-
Notifications
You must be signed in to change notification settings - Fork 24
/
SocialNetworkExample.scala
170 lines (134 loc) · 5.73 KB
/
SocialNetworkExample.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
/**
* @author Aapo Kyrola <[email protected]>
* @version 1.0
*
* @section LICENSE
*
* Copyright [2014] [Aapo Kyrola / Carnegie Mellon University]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Publication to cite: http://arxiv.org/abs/1403.0701
*/
package edu.cmu.graphchidb.examples
import edu.cmu.graphchidb.{GraphChiDatabase, GraphChiDatabaseAdmin}
import scala.util.Random
import edu.cmu.graphchi.GraphChiEnvironment
import scala.io.Source
import java.io.File
import edu.cmu.graphchidb.Util._
import edu.cmu.graphchidb.examples.computation.ConnectedComponentsLabelProp
import edu.cmu.graphchidb.queries.Queries
import edu.cmu.graphchidb.compute.Pagerank
/**
* Example application for social network graph.
* We create fake timestamp and weight attributes for each edge (with random values). This is just for demonstration.
* @author Aapo Kyrola
*/
object SocialNetworkExample {
/**
* Run in scala console
import edu.cmu.graphchidb.examples.SocialNetworkExample._
// To initialize DB (you need to do this only on your first session)
startIngest
// Some testing
recommendFriends(8737)
recommendFriends(2419)
recommendFriendsLimited(2419)
DB.queryIn(DB.originalToInternalId(2409), 0)
DB.queryOut(DB.originalToInternalId(8737), 0)
// To run connected components
connectedComponents()
// After a while, you can ask
ccAlgo.printStats
// To get a vertex component label (which might not be yet the final one)
ccComponentColumn.get(DB.originalToInternalId(8737)).get
// To get pagerank of a vertex (note, that it is being continuously updated), so this
// just looks up the value.
pagerankCol.get(DB.originalToInternalId(8737)).get
*
*/
val sourceFile = System.getProperty("user.home") + "/graphs/soc-LiveJournal1.txt"
val baseFilename = System.getProperty("user.home") + "/graphs/DB/livejournal/lj"
GraphChiDatabaseAdmin.createDatabaseIfNotExists(baseFilename, numShards = 16)
implicit val DB = new GraphChiDatabase(baseFilename, numShards = 16)
/* Create edge columns */
val timestampColumn = DB.createLongColumn("timestamp", DB.edgeIndexing)
val weightColumn = DB.createFloatColumn("weight", DB.edgeIndexing)
/* Pagerank -- run in background continuously */
val pagerankComputation = new Pagerank(DB)
val pagerankCol = DB.column("pagerank", DB.vertexIndexing).get
/* Connected components (run connectedComponents()) */
val ccAlgo = new ConnectedComponentsLabelProp(DB)
val ccComponentColumn = ccAlgo.vertexDataColumn.get
DB.initialize()
/* Start pagerank */
DB.runIteration(pagerankComputation, continuous=true)
def startIngest() {
async {
var i = 0
val r = new Random
val t = System.currentTimeMillis()
timed("ingest", {
val ingestMeter = GraphChiEnvironment.metrics.meter("edgeingest")
Source.fromFile(new File(sourceFile)).getLines().foreach( ln => {
if (!ln.startsWith("#")) {
val toks = ln.split("\t")
val from = Integer.parseInt(toks(0))
val to = Integer.parseInt(toks(1))
val timestamp = System.currentTimeMillis() - r.nextLong() % 1000000
val weight = r.nextFloat()
DB.addEdgeOrigId(0, from, to, timestamp, weight)
i += 1
if (i % 1000 == 0) ingestMeter.mark(1000)
if (i % 1000000 == 0) println((System.currentTimeMillis - t) / 1000 + " s. : Processed: %d".format(i) + " ;" + ingestMeter.getOneMinuteRate + " / sec"
+ "; mean=" + ingestMeter.getMeanRate + " edges/sec")
}
})
DB.flushAllBuffers()
})
}
}
def connectedComponents() {
async {
println("Running connected components in background...")
DB.runGraphChiComputation(ccAlgo, 100, enableScheduler=true)
}
}
/**
* Example: finds the friends-of-friends of user, that are not her friends, and groups them based
* on how many user's friends are friends of them. Note: this can be very slow when user has many friends.
* See function recommendFriendsLimited() for a more efficient, but approximate version.
* Returns top 20 friend of friends that are not my friends */
def recommendFriends(userIdOrigId: Long) = {
val userId = DB.originalToInternalId(userIdOrigId)
val friendsOfFriendsNotMyFriends = Queries.friendsOfFriendsExcl(userId, 0)
friendsOfFriendsNotMyFriends.toSeq.sortBy(-_._2).take(20).map(tup => (DB.internalToOriginalId(tup._1), tup._2))
}
def recommendFriendsLimited(userIdOrigId: Long) = {
val userId = DB.originalToInternalId(userIdOrigId)
val friendsOfFriendsNotMyFriends = Queries.friendsOfFriendsExclWithLimit(userId, 0, maxFriends = 50)
friendsOfFriendsNotMyFriends.toSeq.sortBy(-_._2).take(20).map(tup => (DB.internalToOriginalId(tup._1), tup._2))
}
/**
* Find shortest path between two users
*/
def shortestPath(userFromOrigId: Long, userToOrigId: Long) = {
val path = Queries.shortestPath(DB.originalToInternalId(userFromOrigId), DB.originalToInternalId(userToOrigId), maxDepth=5, edgeType=0)
path.map { id => DB.internalToOriginalId(id) }
}
/* Example of join */
def queryOutWithTimestamps(vertexOrigId: Long) = {
DB.queryOut(DB.originalToInternalId(vertexOrigId), 0).join(timestampColumn)
}
}