-
Notifications
You must be signed in to change notification settings - Fork 1
/
WRITER4.py
258 lines (241 loc) · 7.73 KB
/
WRITER4.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
# Use this to write data to an arff file. Adds all characters changed as an attribute (unlike writer 5, which combines all letters changed into one attribute). Please note that this will overwrite the current contents of the arff file and modify the files being written to the arff file.
import json
import codecs
from sys import argv
script, arff_file = argv
import unicodedata
answer = raw_input("\nWARNING: \nTHIS WILL OVERWRITE THE %s FILE! \nPress any key to continue..." % arff_file)
txt2 = open(arff_file, 'w')
text = ''
print "\n%s file cleared..." % arff_file
# Add the attributes so it is ready for the data to be entered.
attributes = ["\n@relation python_mistakes", "\n", "\n@attribute commit_message string", "\n@attribute additions numeric", "\n@attribute deletions numeric", "\n@attribute changes numeric"]
for i in range(127):
if i == 9:
attributes.append("\n@attribute attribute_TAB numeric")
elif i == 10:
attributes.append("\n@attribute attribute_NEWLINE numeric")
elif i == 13:
attributes.append("\n@attribute attribute_CARRIAGE_RETURN numeric")
elif i > 32:
name = unicodedata.name(unicode(chr(i)))
name = name.replace(" ", "_")
attributes.append("\n@attribute attribute_%s numeric" % name)
attributes.append("\n@attribute class {yes, no}")
attributes.append("\n")
attributes.append("\n@data")
for item in attributes:
text = item
txt2.write(text)
filenames = ['monicano.json', 'monicayes.json', 'eno.json', 'eyes.json', 'NEWyes.json', 'NEWno.json']
print "\nThe current list of files to write from is:"
for i in filenames:
print i
answer = raw_input("\nDo you want to change this list? y/n ")
if answer == 'y':
filenames = []
print "Please type 'DONE' when you are done adding files."
while answer != 'DONE':
answer = raw_input("\nType the name of the file you want to write from (or type DONE): ")
if 'yes' not in answer and 'no' not in answer:
print "\nINVALID FILENAME: The filename needs to have the word 'yes' or 'no' in it."
elif answer != 'DONE':
filenames.append(answer)
print "\nThe current list of files to write from is:"
for i in filenames:
print i
print "\nPreparing to write to these files..."
# Edit the keys to make them compatible
answer = raw_input("\nWARNING: \nThe dictionary keys Mistake, Tags, Length, and Why Not will be removed from all of these files to make them compatible with this program. \nKeys will also be modified to add colons if missing. \nPress any key to continue...")
for eachfile in filenames:
with open(eachfile) as data_file:
data = json.load(data_file)
store = None
for i in data:
if i.get("Additions") != None:
store = i.get("Additions")
i.pop("Additions")
i["Additions:"] = store
print "Changed one Additions to Additions:"
for i in data:
if i.get("Deletions") != None:
store = i.get("Deletions")
i.pop("Deletions")
i["Deletions:"] = store
print "Changed one Deletions to Deletions:"
for i in data:
if i.get("Changes") != None:
store = i.get("Changes")
i.pop("Changes")
i["Changes:"] = store
print "Changed one Changes to Changes:"
for i in data:
if i.get("Length") != None:
store = i.get("Length")
i.pop("Length")
i["Length:"] = store
print "Changed one Length to Length:"
for i in data:
if i.get("Commit Corrections") != None:
store = i.get("Commit Corrections")
i.pop("Commit Corrections")
i["Commit Corrections:"] = store
print "Changed one Commit Corrections to Commit Corrections:"
for i in data:
if i.get("Commit Mistakes") != None:
store = i.get("Commit Mistakes")
i.pop("Commit Mistakes")
i["Commit Mistakes:"] = store
print "Changed one Commit Mistakes to Commit Mistakes:"
for i in data:
if i.get("Commit Mistake") != None:
store = i.get("Commit Mistake")
i.pop("Commit Mistake")
i["Commit Mistakes:"] = store
print "Changed one Commit Mistake to Commit Mistakes:"
for i in data:
if i.get("Tags") != None:
store = i.get("Tags")
i.pop("Tags")
i["Tags:"] = store
print "Changed one Tags to Tags:"
for i in data:
if i.get("Mistakes") != None:
store = i.get("Mistakes")
i.pop("Mistakes")
i["Mistakes:"] = store
print "Changed one Mistakes to Mistakes:"
for i in data:
if i.get("Mistake") != None:
store = i.get("Mistake")
i.pop("Mistake")
i["Mistakes:"] = store
print "Changed one Mistake to Mistakes:"
for i in data:
if i.get("Why Not") != None:
store = i.get("Why Not")
i.pop("Why Not")
i["Why Not:"] = store
print "Changed one Why Not to Why Not:"
for i in data:
if i.get("Tags") != None:
store = i.get("Tags")
i.pop("Tags")
i["Tags:"] = store
print "Changed one Tags to Tags:"
for i in data:
if i.get("Keyword:") != None:
del i["Keyword:"]
for i in data:
if i.get("Mistakes:") != None:
del i["Mistakes:"]
for i in data:
if i.get("Mistake:") != None:
del i["Mistake:"]
for i in data:
if i.get("Why Not:") != None:
del i["Why Not:"]
for i in data:
if i.get("Length:") != None:
del i["Length:"]
for i in data:
if i.get("Tags:") != None:
del i["Tags:"]
for i in data:
assert "Additions:" in i
assert "Deletions:" in i
assert "Changes:" in i
assert "Commit Mistakes:" in i
assert "Commit Corrections:" in i
assert "Tags:" not in i
assert "Message:" in i
assert "Why Not:" not in i
assert "Mistakes:" not in i
assert "Length:" not in i
assert len(i) == 6
eachfile = open(eachfile, 'w')
with eachfile as outfile:
json.dump(data, outfile, indent = 2)
eachfile.close()
print "\nFiles have been modified.."
# Add the data to the arfffile, but keep track of the links so there are no duplicates.
links = []
yes = 0
no = 0
characters = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', ' ']
badcharacters = ["'"]
for i in filenames:
print "\nAdding from %r file.." % i
txt = open(i, 'a+')
data = json.load(txt)
if 'yes' in i:
chicken = "yes"
else:
chicken = "no"
for thing in data:
commitlink = thing.get("Commit Corrections:")
if commitlink in links:
print "This is a duplicate entry, so I am skipping it."
else:
links.append(commitlink)
changes = thing.get("Changes:")
additions = []
deletions = []
delchar = []
addchar = []
chardiff = []
for i in range(128):
addchar.append(0)
delchar.append(0)
chardiff.append(0)
for i in changes:
if str(i[0]) == '+':
i = i[1:]
additions.append(i)
elif str(i[0]) == '-':
i = i[1:]
deletions.append(i)
else:
print "I didn't recognize %s as a + or a -" % i[0]
for i in additions:
for eachletter in i:
if ord(eachletter) < 128:
addchar[ord(eachletter)] += 1
for i in deletions:
for eachletter in i:
if ord(eachletter) < 128:
delchar[ord(eachletter)] += 1
for i in range(127):
chardiff[i] = abs(delchar[i] - addchar[i])
c = 0
for i in chardiff:
c += i
m = thing.get("Message:")
m = m.encode('ascii', errors='ignore')
for letter in m:
if not(str.lower(letter) in characters):
m = m.replace(letter, "")
m = m.replace("'", "")
m = m.replace("\n", "")
m = m.replace("class", "")
a = thing.get("Additions:")
d = thing.get("Deletions:")
text = "\n"
stuff = "'%s', %s, %s, %s" % (m, a, d, c)
text = "".join([text, stuff])
counter = 0
for i in chardiff:
if (counter == 9 or counter == 10 or counter == 13 or counter > 32) and counter != 127:
text = ", ".join([text, str(i)])
else:
print counter
counter += 1
text = ", ".join([text, chicken])
txt2.write(text)
txt.close()
if chicken == 'yes':
yes += 1
else:
no += 1
txt2.close()
print "\nAll done! I have added %d yes entries and %d no entries. \nThank you for using %s" % (yes, no, script)