-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathExFWeightedImplementation.R
268 lines (260 loc) · 11.3 KB
/
ExFWeightedImplementation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
## The heart of the algorithm is to iterate over all possible
## infection clusters of size two, storing their FI (force of
## infection, or, more exactly, the sum of the weighted degree
## of the cluster). The computed FI value is then scaled by the
## probability that it can form, by normalizing the probability
## that the path taken was actually chosen. The return value is
## the entropy of the normalized FI values.
##
## This file contains two implementation of the ExF:
## ExFW and
## ExFWD
## The first is for weighted graphs, the second is for weighted and directed
## graphs.
## To use:
## source("ExpectedForceWeighted.R") ## loads the functions into your current workspace
## ExFvals <- sapply(1:vcount(gr),ExFW, gr) ## computes the ExF of each node in the graph
## or you can call it on one specific node, i.e.
## val <- ExF(qnode, gr) ## where qnode is the query node id
ExFW <- function(qnode,graph){
if(! "weight" %in% list.edge.attributes(graph)){
stop("Error: graph edges must have a weight attribute.") }
.FI <- function(graph,clust){
fi <- sum(E(graph)[adj(clust)]$weight) -
sum(E(graph)[clust %--% clust]$weight)
##cat("clust",clust,"\t degree:",fi,"\n")
return(fi) }
graph <- simplify(graph) ## remove self edges
## Get all neighbors of the querry node at distance one and two:
neigh <- graph.bfs(graph,qnode,order=FALSE,
dist=TRUE,unreachable=FALSE,
callback=function(graph,data,extra){
data["dist"]==3})$dist
## vector of nodes at distance one
d.one.nodes <- which(neigh==1)
n.d.one <- length(d.one.nodes)
w.d.one <- sum(E(graph)[adj(qnode)]$weight)
##cat("w.d.one:",w.d.one,"\n")
## vector of nodes at distance two
d.two.nodes <- which(neigh==2)
## SAFETY CHECKS
if(length(E(graph)[adj(qnode)]) == 0){
## cat("no outbound edges\n")
return(-1)
}
if(length(d.two.nodes)==0){
## cat("no nodes at distance two\n")
return(-2)
}
## pre-allocate the vector of FI values
guestimated.numFI <- 2*sum(n.d.one*length(d.two.nodes))
allFI <- numeric(guestimated.numFI+5)
numFI <- 0; totalFI <- 0
## The iteration is over all nodes at distance one from the source,
## within this loop we consider both all remaining d.one.nodes
## and all d.two.nodes reachable from the current d.one.node.
for(i in 1:n.d.one){
## we will need the following edge weight (sums) to scale the FI:
w.q.i <- E(graph, c(qnode,d.one.nodes[i]))$weight
w.d.i <- sum(E(graph)[adj(d.one.nodes[i])]$weight)
##cat(" *w.q.i:",w.q.i,"\n")
##cat(" w.d.i:",w.d.i,"\n")
if(i<n.d.one){ ## all remaining d.one.nodes
for(j in (i+1):n.d.one){
## Increase storage, if necessary: (code for optimization only)
if(numFI>guestimated.numFI){
guestimated.numFI <- round(1.5 * guestimated.numFI)
foo <- allFI
allFI <- vector(mode="numeric",length=guestimated.numFI+5)
allFI[1:numFI] <- foo[1:numFI]
} ## END increase storage
## compute cluster FI
clustFI <- .FI(graph, c(qnode, d.one.nodes[c(i,j)]))
## store once for each way it can form, scaling by the probability
## of this path; we need the following edge weight (sums)
w.q.j <- E(graph, c(qnode,d.one.nodes[j]))$weight
w.d.j <- sum(E(graph)[adj(d.one.nodes[j])]$weight)
##cat(" w.q.j:",w.q.j,"\n")
##cat(" w.d.j:",w.d.j,"\n")
## way 1: qnode -- i, qnode -- j
wmult <- w.q.i/w.d.one * w.q.j/(w.d.one + w.d.i - 2*w.q.i)
numFI <- numFI+1
allFI[numFI] <- clustFI*wmult
totalFI <- totalFI + clustFI*wmult
## way 2: qnode -- j, qnode -- i
wmult <- w.q.j/w.d.one * w.q.i/(w.d.one + w.d.j - 2*w.q.j)
numFI <- numFI+1
allFI[numFI] <- clustFI*wmult
totalFI <- totalFI + clustFI*wmult
## if an i -- j edge, two more ways:
if(d.one.nodes[j] %in% neighborhood(graph,d.one.nodes[i],
order=1)[[1]]){
## one more edge weight needed
w.i.j <- E(graph, c(d.one.nodes[i],d.one.nodes[j]))$weight
##cat(" w.i.j:",w.i.j,"\n")
## way 3: qnode -- i, i -- j
wmult <- w.q.i/w.d.one * w.i.j/(w.d.one + w.d.i - 2*w.q.i)
numFI <- numFI+1
allFI[numFI] <- clustFI*wmult
totalFI <- totalFI + clustFI*wmult
## way 4: qnode -- j, j -- i
wmult <- w.q.j/w.d.one * w.i.j/(w.d.one + w.d.j - 2*w.q.j)
numFI <- numFI+1
allFI[numFI] <- clustFI*wmult
totalFI <- totalFI + clustFI*wmult
}}} ## end all remaining d.one.nodes
for(dtn in d.two.nodes){ ## all d.two.nodes
if(dtn %in% neighborhood(graph,d.one.nodes[i],order=1)[[1]]){
## If an edge to the current d.one.node
## increase storage, if necessary: (code for optimization only)
if(numFI>guestimated.numFI){
guestimated.numFI <- round(1.5 * guestimated.numFI)
foo <- allFI
allFI <- vector(mode="numeric",length=guestimated.numFI+5)
allFI[1:numFI] <- foo[1:numFI]
} ## END increase storage
## compute cluster FI
clustFI <- .FI(graph, c(qnode, d.one.nodes[i], dtn))
## one more edge weight needed
w.i.j <- E(graph, c(d.one.nodes[i],dtn))$weight
##cat(" w.i.j:",w.i.j,"\n")
wmult <- w.q.i/w.d.one * w.i.j/(w.d.one + w.d.i - 2*w.q.i)
numFI <- numFI+1
allFI[numFI] <- clustFI * wmult
totalFI <- totalFI + clustFI * wmult
}}
} ## end looping over all nodes at distance one
## calculate the entropy, note that this clips allFI at numFI
norm <- allFI[1:numFI]/totalFI
##cat("clust degs:",round(allFI[1:numFI],digits=5),"\n")
##cat("num clusts:",numFI,"\n")
return(-sum(norm*log(norm)))
}
ExFWD <- function(qnode,graph){
if(! is.directed(graph)){
stop("Caution: This function expects a directed graph and may produce inconsistent results for undirected graphs.")
}
if(! "weight" %in% list.edge.attributes(graph)){
stop("Error: graph edges must have a weight attribute.") }
graph <- simplify(graph) ## remove self edges
.FI <- function(graph,clust){
fi <- sum(E(graph)[from(clust)]$weight) -
sum(E(graph)[clust %->% clust]$weight)
##cat("clust",clust,"\t degree:",fi,"\n")
return(fi) }
## Get all neighbors of the querry node at distance one and two:
neigh <- graph.bfs(graph,qnode,order=FALSE,dist=TRUE,
neimode="out",unreachable=FALSE,
callback=function(graph,data,extra){
data["dist"]==3})$dist
## vector of nodes at distance one
d.one.nodes <- which(neigh==1)
n.d.one <- length(d.one.nodes)
w.d.one <- sum(E(graph)[from(qnode)]$weight)
##cat("w.d.one:",w.d.one,"\n")
## vector of nodes at distance two
d.two.nodes <- which(neigh==2)
## cat("node:",qnode,'\n')
## cat('\t',d.one.nodes,'\n')
## cat('\t',d.two.nodes,'\n')
if(length(E(graph)[from(qnode)]) == 0){
## cat("no outbound edges\n")
return(-1)
}
if(length(d.two.nodes)==0){
## cat("no nodes at distance two\n")
return(0)
}
## pre-allocate the vector of FI values
guestimated.numFI <- 2*sum(n.d.one*length(d.two.nodes))
allFI <- numeric(guestimated.numFI+5)
numFI <- 0; totalFI <- 0
## The iteration is over all nodes at distance one from the source,
## within this loop we consider both all remaining d.one.nodes
## and all d.two.nodes reachable from the current d.one.node.
for(i in 1:n.d.one){
## we will need the following edge weight (sums) to scale the FI:
w.q.i <- E(graph, c(qnode,d.one.nodes[i]))$weight
if(are.connected(graph,d.one.nodes[i],qnode)){
w.i.q <- E(graph, c(d.one.nodes[i],qnode))$weight
} else {w.i.q <- 0 }
w.d.i <- sum(E(graph)[from(d.one.nodes[i])]$weight)
##cat(" *w.q.i:",w.q.i,"\n")
##cat(" w.d.i:",w.d.i,"\n")
if(i<n.d.one){ ## all remaining d.one.nodes
for(j in (i+1):n.d.one){
## Increase storage, if necessary: (code for optimization only)
if(numFI>guestimated.numFI){
guestimated.numFI <- round(1.5 * guestimated.numFI)
foo <- allFI
allFI <- vector(mode="numeric",length=guestimated.numFI+5)
allFI[1:numFI] <- foo[1:numFI]
} ## END increase storage
## compute cluster FI
clustFI <- .FI(graph, c(qnode, d.one.nodes[c(i,j)]))
## store once for each way it can form, scaling by the probability
## of this path; we need the following edge weight (sums)
w.q.j <- E(graph, c(qnode,d.one.nodes[j]))$weight
if(are.connected(graph,d.one.nodes[j],qnode)){
w.j.q <- E(graph, c(d.one.nodes[j],qnode))$weight
} else { w.j.q <- 0 }
w.d.j <- sum(E(graph)[from(d.one.nodes[j])]$weight)
##cat(" w.q.j:",w.q.j,"\n")
##cat(" w.d.j:",w.d.j,"\n")
## way 1: qnode -- i, qnode -- j
wmult <- w.q.i/w.d.one * w.q.j/(w.d.one - w.q.i + w.d.i - w.i.q)
numFI <- numFI+1
allFI[numFI] <- clustFI*wmult
totalFI <- totalFI + clustFI*wmult
## way 2: qnode -- j, qnode -- i
wmult <- w.q.j/w.d.one * w.q.i/(w.d.one - w.q.j + w.d.j - w.j.q)
numFI <- numFI+1
allFI[numFI] <- clustFI*wmult
totalFI <- totalFI + clustFI*wmult
## way 3: qnode -- i, i -- j
if(are.connected(graph,d.one.nodes[i],d.one.nodes[j])){
## one more edge weight needed
w.i.j <- E(graph, c(d.one.nodes[i],d.one.nodes[j]))$weight
wmult <- w.q.i/w.d.one * w.i.j/(w.d.one - w.q.i + w.d.i - w.i.q)
numFI <- numFI+1
allFI[numFI] <- clustFI*wmult
totalFI <- totalFI + clustFI*wmult
}
## way 4: qnode -- j, j -- i
if(are.connected(graph,d.one.nodes[j],d.one.nodes[i])){
w.j.i <- E(graph, c(d.one.nodes[j],d.one.nodes[i]))$weight
wmult <- w.q.j/w.d.one * w.j.i/(w.d.one - w.q.j + w.d.j - w.j.i)
numFI <- numFI+1
allFI[numFI] <- clustFI*wmult
totalFI <- totalFI + clustFI*wmult
}
}} ## end all remaining d.one.nodes
for(dtn in d.two.nodes){ ## all d.two.nodes
##if(dtn %in% neighborhood(graph,d.one.nodes[i],order=1)[[1]]){
## If an edge to the current d.one.node
if(are.connected(graph,d.one.nodes[i],dtn)){
## increase storage, if necessary: (code for optimization only)
if(numFI>guestimated.numFI){
guestimated.numFI <- round(1.5 * guestimated.numFI)
foo <- allFI
allFI <- vector(mode="numeric",length=guestimated.numFI+5)
allFI[1:numFI] <- foo[1:numFI]
} ## END increase storage
## compute cluster FI
clustFI <- .FI(graph, c(qnode, d.one.nodes[i], dtn))
## one more edge weight needed
w.i.j <- E(graph, c(d.one.nodes[i],dtn))$weight
##cat(" w.i.j:",w.i.j,"\n")
wmult <- w.q.i/w.d.one * w.i.j/(w.d.one - w.q.i + w.d.i - w.i.q)
numFI <- numFI+1
allFI[numFI] <- clustFI * wmult
totalFI <- totalFI + clustFI * wmult
}}
} ## end looping over all nodes at distance one
## calculate the entropy, note that this clips allFI at numFI
norm <- allFI[1:numFI]/totalFI
##cat("clust degs:",round(allFI[1:numFI],digits=5),"\n")
##cat("num clusts:",numFI,"\n")
##cat(round(allFI[1:numFI],digits=2),'\n')
return(-sum(norm*log(norm)))
}