-
Notifications
You must be signed in to change notification settings - Fork 4
/
_3_rank_nodes.py
34 lines (26 loc) · 875 Bytes
/
_3_rank_nodes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# -*- coding: utf-8 -*-
"""
@author: georg
Take the top 3000 nodes ranked based on each feature and store it to be used for NETRATE
Take the top 100 based on each feature and store it as seed set.
"""
import os
import pandas as pd
"""
Main
"""
os.chdir("path\\to\\Data")
dat = pd.read_csv("node_features.csv")
#------ Take the top 0.5%~=3000 seeds based on degree and store it in each column of top
perc = 3000
top = pd.DataFrame(columns=dat.columns)
col="Degree"
top[col] = dat.nlargest(perc,col)["Nodes"].values
top = top.drop(["Nodes"], axis=1)
#------ Store the node ids of the top 3000 nodes for each feature
top.to_csv("top_nodes.csv",index=False)
#------ Store as seed sets the top 100 of each feature
c = "Kcores"
f = open("Seeds\\centr_"+c.lower()+"_seeds.txt","w")
f.write(" ".join([str(x) for x in list(top.loc[0:100,c].values)]))
f.close()