-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathutilities.py
73 lines (62 loc) · 1.57 KB
/
utilities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import numpy as np
import pickle
fp = open('datafile.pkl','rb')
data = pickle.load(fp)
fp.close()
chars = data['chars']
charlen = data['charlen']
maxlen = data['maxlen']
lcase_table = u'abcçdefgğhıijklmnoöprsştuüvyz'
ucase_table = u'ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZ'
def upper(data):
data = data.replace('i',u'İ')
data = data.replace(u'ı',u'I')
result = ''
for char in data:
try:
char_index = lcase_table.index(char)
ucase_char = ucase_table[char_index]
except:
ucase_char = char
result += ucase_char
return result
def lower(data):
data = data.replace(u'İ',u'i')
data = data.replace(u'I',u'ı')
result = ''
for char in data:
try:
char_index = ucase_table.index(char)
lcase_char = lcase_table[char_index]
except:
lcase_char = char
result += lcase_char
return result
def capitalize(data):
return data[0].upper() + data[1:].lower()
def title(data):
return " ".join(map(lambda x: x.capitalize(), data.split()))
#
def encode(word,maxlen=22,is_pad_pre=False):
wlen = len(word)
if wlen > maxlen:
word = word[:maxlen]
word = lower(word)
pad = maxlen - len(word)
if is_pad_pre :
word = pad*' '+word
else:
word = word + pad*' '
mat = []
for w in word:
vec = np.zeros((charlen))
if w in chars:
ix = chars.index(w)
vec[ix] = 1
mat.append(vec)
return np.array(mat)
def decode(mat):
word = ""
for i in range(mat.shape[0]):
word += chars[np.argmax(mat[i,:])]
return word.strip().split()[0]