-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_node2vec_spoke_2021.py
34 lines (31 loc) · 1.38 KB
/
run_node2vec_spoke_2021.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# TODO: try this with a smaller graph?
from node2vec import random_walks, run_word2vec
import spoke_loader
import numpy as np
from scipy import sparse
import scipy.io
import umap
# load graph
if __name__ == '__main__':
nodes, edges, node_types, edge_types, edge_matrix = spoke_loader.load_spoke('spoke_2021.jsonl.gz', remove_unused_nodes=True)
edge_matrix = scipy.io.mmread('spoke_2021.mtx.gz')
print('matrix loaded')
edge_matrix = spoke_loader.symmetrize_matrix(edge_matrix)
edge_matrix = sparse.lil_matrix(edge_matrix)
print('calculating random walks...')
walks = random_walks(edge_matrix.rows, r=5, l=25, verbose=True)
n2v_model = run_word2vec(walks, 8, 50)
n2v_model.save('spoke_2021_node2vec_gensim_50')
um = umap.UMAP()
um.fit_transform(n2v_model.wv.vectors)
np.savetxt('spoke_umap.txt', um.embedding_)
# make a 2d plot of spoke nodes, colored by type
nodes_types = [n[2] for n in nodes]
# plot using plotly?
import plotly.express as px
fig = px.scatter(x=um.embedding_[:, 0], y=um.embedding_[:,1], hover_data=[[(n[1], node_types[n[2]]) for n in nodes]], color=[node_types[n[2]] for n in nodes])
fig.update_traces(marker=dict(size=1))
html = fig.to_html()
with open('spoke.html', 'w') as f:
f.write(html)
np.savetxt('spoke_small_nodes.txt', np.array([[str(x) for x in n] for n in nodes]), fmt='%s', delimiter='\t')