-
Notifications
You must be signed in to change notification settings - Fork 2
/
preprocessing.py
37 lines (31 loc) · 1.11 KB
/
preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
def gen_labeled_data(input_file, labeled_file, unknown_file):
'''
Split input file to labeled file and unknown file.
'''
f_labeled = open(labeled_file, 'w')
f_unknown = open(unknown_file, 'w')
with open(input_file) as f:
isLabeled = False
for line in f:
if 'UNKNOWN' in line:
isLabeled = False
f_unknown.write(line)
elif 'VIDEO' in line or 'NOVEL' in line or 'GAME' in line or 'TRAVEL' in line or 'LOTTERY' in line or 'ZIPCODE' in line or 'OTHER' in line or 'TEST' in line or isLabeled:
isLabeled = True
f_labeled.write(line)
elif isLabeled:
f_unknown.write(line)
elif not isLabeled:
f_labeled.write(line)
f_labeled.close()
f_unknown.close()
def main():
if(len(sys.argv) != 4):
print 'Usage: ./gen_labeled_data.py initial_file labeled_file unknown_file'
else:
gen_labeled_data(sys.argv[1], sys.argv[2], sys.argv[3])
if __name__ == '__main__':
main()