-
Notifications
You must be signed in to change notification settings - Fork 1
/
build_cisplatin_network.py
98 lines (66 loc) · 2.78 KB
/
build_cisplatin_network.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import random
import datetime
from subprocess import call
import collections
import os
import sys
import cPickle
import scipy
from numpy import dot
import numpy
from operator import itemgetter
import threading
from Queue import *
import copy
import time
import networkx as nx
# Global LINK folder location
LINKROOT="../../"
sys.path.append(LINKROOT+"/helpers")
sys.path.append(LINKROOT+"/model")
from link_logger import logger
import STRING_graph
import reconstruction_algorithms as recalg
import helpers
from IPython.core.debugger import Tracer; debug_here = Tracer()
import hsa_model as docmodel
# Parameters
from manuscript_parameters import *
## Overwrite MS parameters with smaller test Corpus variant
lsi_dims=100
with_genia=0
with_mesh=False
with_stemmer=True
pid_np_only=False
# LSI model
if "lsi" not in globals():
logger.info("loading LSI")
lsi=docmodel.load_hprd_corpus(num_topics=lsi_dims,with_genia=with_genia,with_mesh=with_mesh,with_stemmer=with_stemmer,pid_np_only=pid_np_only)
STRING=STRING_graph.load_string("human","v9.0")
background=docmodel.AnnotatedGraph.build_HPRDNPInteractome()
logger.info("Using LSI Model:%s"%(lsi.name))
INTERMEDIATE_THR=[20,40,41,46,47,50,60,70,77,80,85,90,100,107,110,112,120,150,200,250,300]
## Cisplatin synonyms?
cispl_cor=lsi.word_corelations("Cisplatin")
# Let's build a "cisplatin" document
ddict=lsi._GLOBALDICTIONARY
cispl_doc=lsi.lsi[ddict.doc2bow(['cisplatin'])]
doc_sims=lsi.publication_by_similarity_to_vec(cispl_doc)
background_cispl=docmodel.AnnotatedGraph.build_HPRDNPInteractome()
doc_sims_d=dict(doc_sims)
background_cispl.score_edges_with_doc_sim(doc_sims_d)
# Make a network starting from "ATM"
seed_graph=background_cispl.subgraph(["ATM"])
empty_graph=docmodel.AnnotatedGraph()
atm_related=recalg.rocSimGraph(simModel=lsi,seed=[],seed_graph=seed_graph,reference_pathway=empty_graph,background=background_cispl,stop_at=-1,niter=5,bunch_size=20,neighborhood=4,use_graph=None,combine_graph=None,combine_weight=1.0,force_nodes=[],verbose=False,MERGE_COMPLEXES=False,DOC_SIM=doc_sims_d,AGGREGATE_WITH=max,intermediate_graph_threshold=INTERMEDIATE_THR,add_edges_to_seed_graph=True,score_all_background=False,SCAFFOLD=None,build_seed_from_references=False)
logger.info("Cisplatin network contains %d nodes, %d edges, %d references"%(atm_related[0].number_of_nodes(),atm_related[0].number_of_edges(),len(atm_related[0].references())))
# output in GEXF format
gexf_graph= nx.Graph()
gexf_graph.add_edges_from(atm_related[0].edges())
nx.write_gexf(gexf_graph,path="cisplatin_network.gexf")
nx.write_gml(gexf_graph,path="cisplatin_network.gml")
f=open("cisplatin_network.tsv",'w')
for e in atm_related[0].edges(data=True):
src,tgt,mdata=e
f.write("%s\t%s\t%f\t%s\n"%(src,tgt,mdata['confidence'],";".join([str(x) for x in mdata['refs']])))
f.close()