-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaspect_term1.py
133 lines (115 loc) · 3.89 KB
/
aspect_term1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import sys
from parser import getSentences, getAspectTermsDict
from nltk.tokenize import word_tokenize
import os
import nltk
def detect_quality(term,data,JJ_list,mydict,depth,key):
if depth==2:
mydict[key] = list(set(mydict[key]))
return mydict
for relation in data:
if(len(relation)>=5):
if(relation[2].split('-')[0]==term):
if (relation[4].split('-')[0] in JJ_list):
mydict[key].append(relation[4].split('-')[0])
detect_quality(relation[4].split('-')[0],data,JJ_list,mydict,depth+1,key)
elif(relation[4].split('-')[0]==term):
if(relation[2].split('-')[0] in JJ_list):
mydict[key].append(relation[2].split('-')[0])
detect_quality(relation[2].split('-')[0],data,JJ_list,mydict,depth+1,key)
mydict[key] = list(set(mydict[key]))
return mydict
def findvblist(sentence):
# token = nltk.word_tokenize(sentence)
# tagged_list = nltk.pos_tag(token)
sent = ""
token = sentence.split(" ")
f = open('stopwords.txt','r')
stopwords = []
for line in f:
stopwords.append(line)
for i in range(len(stopwords)):
stopwords[i] = stopwords[i].replace('\n','')
# print stopwords
for word in token:
if word not in stopwords:
sent = sent+word
sent = sent+" "
sent.strip()
token = nltk.word_tokenize(sent)
tagged_list = nltk.pos_tag(token)
# print tagged_list
vblist = []
for tags in tagged_list:
if (tags[1]=='VB' or tags[1]=='VBD' or tags[1]=='VBG' or tags[1]=='VBN' or tags[1]=='VBP'
or tags[1]=='VBZ'):
vblist.append(tags[0])
return vblist
def detect_terms(temp,sentence,nnlist):
aspect_term = []
for relation in temp:
if relation[0]=='amod':
aspect_term.append(relation[2].split('-')[0])
if (relation[0]=='acomp' or relation[0]=='xcomp' or relation[0]=='nmod' or relation[0]=='dobj'
or relation[0]=='nsubj' or relation[0]=='nsubjpass' or relation[0]=='xcomp'
or relation[0]=='pobj' or relation[0]=='abbrev'):
aspect_term.append(relation[4].split('-')[0])
# print aspect_term
aspect_term = list(set(aspect_term))
for term in aspect_term:
if term not in nnlist:
aspect_term.remove(term)
for relation in temp:
if relation[0]=='conj' and relation[2].split('-')[0] in aspect_term:
aspect_term.append(relation[4].split('-')[0])
aspect_term = list(set(aspect_term))
return aspect_term
def main():
# f1 = open('small_out4.txt','r')
# f2 = open('small_out3.txt','r')
sentence_list = []
aspect_term = []
aspect_term = getAspectTermsDict(sys.argv[1])
sentence_list = getSentences(sys.argv[1])
# print len(sentence_list)
fp1 = open('all_sentences_out.py','w')
fp1.write("all_sentences = ")
fp1.write(str(sentence_list))
fp1.close()
fp = open('aspect_term_out.py','w')
aspect_terms = []
# print len(sentence_list)
for i in range(len(sentence_list)):
sentence = sentence_list[i][1]
#print str(i) + " " + str(len(sentence_list))
pos_list = aspect_term[i]
temp = []
terms = []
os.popen("echo '"+sentence+"' > ./stanford-parser-full/stanfordtemp.txt")
parser_out = os.popen("./stanford-parser-full/lexparser.sh ./stanford-parser-full/stanfordtemp.txt")
#os.popen("echo '"+sentence+"' > ~/stanford-parser-full-2015-04-20/stanfordtemp.txt")
#parser_out = os.popen("~/stanford-parser-full-2015-04-20/lexparser.sh ~/stanford-parser-full-2015-04-20/stanfordtemp.txt")
for relation in parser_out:
relation = word_tokenize(relation)
#fp.write(str(relation) + '\n')
if(len(relation)!=0):
temp.append(relation)
x = detect_terms(temp,sentence,pos_list[0])
JJ_list = pos_list[1]
if len(JJ_list)==0:
JJ_list = findvblist(sentence)
for term in x:
mydict = {}
mydict[term] = []
terms.append(detect_quality(term,temp,JJ_list,mydict,0,term))
# for term in pos_list[0]:
# terms.append(detect(term,temp,pos_list[1]))
aspect_terms.append(terms)
fp.write("NN_JJ = [")
for item in range(len(aspect_terms)):
fp.write(str(aspect_terms[item])+",")
fp.write("]")
fp.close()
return aspect_terms
if __name__ == '__main__':
main()