-
Notifications
You must be signed in to change notification settings - Fork 45
/
cwru.py
152 lines (131 loc) · 5.19 KB
/
cwru.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import os,re
import errno
import random
import urllib.request as urllib
import numpy as np
from scipy.io import loadmat
from sklearn.utils import shuffle
def fliter_key(keys):
fkeys = []
for key in keys:
matchObj = re.match( r'(.*)FE_time', key, re.M|re.I)
if matchObj:
fkeys.append(matchObj.group(1))
if(len(fkeys)>1):
print(keys)
return fkeys[0]+'DE_time',fkeys[0]+'FE_time'
exps_idx = {
'12DriveEndFault':0,
'12FanEndFault':9,
'48DriveEndFault':0
}
faults_idx = {
'Normal': 0,
'0.007-Ball': 1,
'0.014-Ball': 2,
'0.021-Ball': 3,
'0.007-InnerRace': 4,
'0.014-InnerRace': 5,
'0.021-InnerRace': 6,
'0.007-OuterRace6': 7,
'0.014-OuterRace6': 8,
'0.021-OuterRace6': 9,
# '0.007-OuterRace3': 10,
# '0.014-OuterRace3': 11,
# '0.021-OuterRace3': 12,
# '0.007-OuterRace12': 13,
# '0.014-OuterRace12': 14,
# '0.021-OuterRace12': 15,
}
def get_class(exp,fault):
if fault == 'Normal':
return 0
return exps_idx[exp] + faults_idx[fault]
class CWRU:
def __init__(self, exps, rpms, length):
for exp in exps:
if exp not in ('12DriveEndFault', '12FanEndFault', '48DriveEndFault'):
print("wrong experiment name: {}".format(exp))
return
for rpm in rpms:
if rpm not in ('1797', '1772', '1750', '1730'):
print("wrong rpm value: {}".format(rpm))
return
# root directory of all data
rdir = os.path.join('Datasets/CWRU')
print(rdir,exp,rpm)
fmeta = os.path.join(os.path.dirname('__file__'), 'metadata.txt')
all_lines = open(fmeta).readlines()
all_lines = open(fmeta).readlines()
lines = []
for line in all_lines:
l = line.split()
if (l[0] in exps or l[0] == 'NormalBaseline') and l[1] in rpms:
if 'Normal' in l[2] or '0.007' in l[2] or '0.014' in l[2] or '0.021' in l[2]:
if faults_idx.get(l[2],-1)!=-1:
lines.append(l)
self.length = length # sequence length
lines = sorted(lines, key=lambda line: get_class(line[0],line[2]))
self._load_and_slice_data(rdir, lines)
# shuffle training and test arrays
self._shuffle()
self.all_labels = tuple(((line[0]+line[2]),get_class(line[0],line[2])) for line in lines)
self.classes = sorted(list(set(self.all_labels)), key=lambda label: label[1])
self.nclasses = len(self.classes) # number of classes
def _mkdir(self, path):
try:
os.makedirs(path)
except OSError as exc:
if exc.errno == errno.EEXIST and os.path.isdir(path):
pass
else:
print("can't create directory '{}''".format(path))
exit(1)
def _download(self, fpath, link):
print(link + " Downloading to: '{}'".format(fpath))
urllib.urlretrieve(link, fpath)
def _load_and_slice_data(self, rdir, infos):
self.X_train = np.zeros((0, self.length, 2))
self.X_test = np.zeros((0, self.length, 2))
self.y_train = []
self.y_test = []
train_cuts = list(range(0,60000,80))[:660]
test_cuts = list(range(60000,120000,self.length))[:25]
for idx, info in enumerate(infos):
# directory of this file
fdir = os.path.join(rdir, info[0], info[1])
self._mkdir(fdir)
fpath = os.path.join(fdir, info[2] + '.mat')
print(idx,fpath)
if not os.path.exists(fpath):
self._download(fpath, info[3].rstrip('\n'))
mat_dict = loadmat(fpath)
key1,key2 = fliter_key(mat_dict.keys())
time_series = np.hstack((mat_dict[key1], mat_dict[key2]))
idx_last = -(time_series.shape[0] % self.length)
print(time_series.shape)
clips = np.zeros((0, 2))
for cut in shuffle(train_cuts):
clips = np.vstack((clips, time_series[cut:cut+self.length]))
clips = clips.reshape(-1, self.length,2)
self.X_train = np.vstack((self.X_train, clips))
clips = np.zeros((0, 2))
for cut in shuffle(test_cuts):
clips = np.vstack((clips, time_series[cut:cut+self.length]))
clips = clips.reshape(-1, self.length,2)
self.X_test = np.vstack((self.X_test, clips))
self.y_train += [get_class(info[0],info[2])] * 660
self.y_test += [get_class(info[0],info[2])] * 25
self.X_train.reshape(-1, self.length,2)
self.X_test.reshape(-1, self.length,2)
def _shuffle(self):
# shuffle training samples
index = list(range(self.X_train.shape[0]))
random.Random(0).shuffle(index)
self.X_train = self.X_train[index]
self.y_train = np.array(tuple(self.y_train[i] for i in index))
# shuffle test samples
index = list(range(self.X_test.shape[0]))
random.Random(0).shuffle(index)
self.X_test = self.X_test[index]
self.y_test = np.array(tuple(self.y_test[i] for i in index))