-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathread_DGL_benchmark.py
60 lines (51 loc) · 1.68 KB
/
read_DGL_benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
"""
Random Walk test on Benchmark Datasets
"""
import numpy as np
import pandas as pd
# system and other
import gc
import os
import time
import random
# MTX file reader
from scipy.io import mmread
import networkx as nx
import dgl
from dgl.sampling import random_walk, pack_traces
import torch as th
def read_dgl(datafile):
M = mmread(datafile).asfptype()
src_ids = th.tensor(M.row)
dst_ids = th.tensor(M.col)
_g = dgl.graph((src_ids, dst_ids),idtype=th.int32)
return _g
def run_dgl_rw(_G, _seeds, _depth):
t1 = time.time()
traces, types = random_walk(_G, nodes=_seeds, length=_depth)
t2 = time.time() - t1
return t2
data = ['preferentialAttachment', 'as-Skitter', 'citationCiteseer', 'caidaRouterLevel', 'coAuthorsDBLP', 'coPapersDBLP']
for file_name in data:
# dgl RW
t1 = time.time()
G_dgl = read_dgl('./data/'+ file_name + '.mtx')
t2 = time.time() - t1
print (t2)
# some parameters
num_seeds_ = [1000, 3000, 5000, 10000, 20000, 40000, 75000, 100000, 150000, 200000, 250000, 300000]
max_depth_ = np.arange(2,2**7+1,2)
'''
for max_depth in max_depth_:
for num_seeds in num_seeds_:
print('number of seeds:', num_seeds)
print('RW length:', max_depth)
t_dgl = []
for i in range(11):
seeds = th.randint(0, G_dgl.num_nodes(), (num_seeds, ), dtype=th.int32)
t = run_dgl_rw(G_dgl, seeds, max_depth)
t_dgl.append(t)
df_t_dgl = pd.DataFrame([t_dgl])
df_t_dgl.to_csv('./RW_dgl_' + file_name + '_' + str(num_seeds) + '_.csv', mode='a', index=False, header=None)
print(' ')
'''