-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcolumns.py
215 lines (207 loc) · 10.3 KB
/
columns.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
import pandas as pd
import analyzer
import devices
import services
# mask = [original, measurement, code_of_sensor, voltage_param,
# short_search_name, full_search_name, concat_of_short_search_name_and_voltage_param]
# Need to organize "file=devices.nkvv.work_file" etc... -> to func.
def columns_list_maker(device_type: str = 'nkvv',
data: pd.core = None,
file: str = None,
sep: str = None,
encoding: str = None):
"""
Makes list of columns based on set properties
Must have a device_type set and a raw data input
Mostly works well with a default device properties described in devices.py
Recommended to return the dict in a variable 'cols_list' in a runner-file
"""
if file is None:
file, sep, encoding, parse_dates = devices.links(device_type)[1:5]
if device_type == 'nkvv':
return list(pd.read_csv(file, sep=sep, encoding=encoding))
elif device_type == 'gpp':
return pd.read_csv(file, sep=sep, encoding=encoding, header=[1]).iloc[0].to_string().split(' ')
elif device_type == 'kiv':
if data is None:
data = analyzer.get_data(device_type=device_type)
if data.columns[0] == ' № ':
return list(data.columns)
else:
for i in range(data.shape[0]):
if data.iloc[i, 0] != ' № ':
pass
else:
return list(data.iloc[i+1])
# noinspection PyUnreachableCode
break
elif device_type == 'mon':
if data is None:
data = analyzer.get_data(device_type=device_type)
return list(data.columns)
# Analyze all columns
def columns_analyzer(device_type: str ='nkvv',
list_for_columns: list = None):
"""
Must have a device_type set. Should take a list of columns as a base (recommend to use func. 'columns_list_maker')
Based on devices attributes processes data columns into a dictionary with enumerated keys.
and values as list of parameters (the mask):
original name,
measurement,
code_of_sensor,
voltage_param,
short_search_name,
full_search_name,
concat_of_short_search_name_and_voltage_param
The keys of the dictionary can be used as indexes
Uses in analytical functions for defining the processing columns
Recommended to return the dict in a variable 'cols' in a runner-file
"""
if list_for_columns is None:
list_for_columns = columns_list_maker(device_type=device_type)
source_dict = {k: [v] for k, v in enumerate(list_for_columns)}
result_dict = source_dict.copy()
if device_type == "nkvv":
for i in range(len(result_dict)):
tail = services.Trimmer.right(result_dict[i][0], 2)
head = services.Trimmer.left(result_dict[i][0], 4)
for key in devices.nkvv.data_types:
if key == tail:
result_dict[i].append(devices.nkvv.data_types[tail])
elif key == head:
result_dict[i].append(devices.nkvv.data_types[head])
if len(result_dict[i]) < 2:
result_dict[i].append('other')
if result_dict[i][0].find("_") == -1:
result_dict[i].append('overall')
else:
codename = services.Trimmer.right((services.Trimmer.left(result_dict[i][0],
result_dict[i][0].find("_") + 3)), 2)
result_dict[i].append(codename)
if services.Trimmer.right(result_dict[i][2], 1) == '1':
result_dict[i].append('HV')
elif services.Trimmer.right(result_dict[i][2], 1) == '2':
result_dict[i].append('MV')
else:
result_dict[i].append('no_voltage')
for a_key in devices.nkvv.data_search_name:
if services.Trimmer.left(result_dict[i][0], len(a_key)) == a_key:
result_dict[i].append(devices.nkvv.data_search_name[a_key][0])
result_dict[i].append(devices.nkvv.data_search_name[a_key][1])
if len(result_dict[i]) < 5:
result_dict[i].append('-')
result_dict[i].append('-')
result_dict[i].append(result_dict[i][4] + '_' + result_dict[i][3])
elif device_type == 'mon': # merge with nkvv, set links to device_class
for i in range(len(result_dict)):
tail = services.Trimmer.right(result_dict[3][0], 2)
head = services.Trimmer.left(result_dict[3][0], 4)
for key in devices.mon.data_types:
if key == tail:
result_dict[i].append(devices.mon.data_types[tail])
elif key == head:
result_dict[i].append(devices.mon.data_types[head])
if len(result_dict[i]) < 2:
result_dict[i].append('other')
if result_dict[i][0].find("_") == -1:
result_dict[i].append('overall')
else:
codename = services.Trimmer.right((services.Trimmer.left(result_dict[i][0],
result_dict[i][0].find("_") + 3)), 2)
result_dict[i].append(codename)
if services.Trimmer.right(result_dict[i][2], 1) == '1':
result_dict[i].append('HV')
elif services.Trimmer.right(result_dict[i][2], 1) == '2':
result_dict[i].append('MV')
else:
result_dict[i].append('no_voltage')
for a_key in devices.mon.data_search_name:
if services.Trimmer.left(result_dict[i][0], len(a_key)) == a_key:
result_dict[i].append(devices.mon.data_search_name[a_key][0])
result_dict[i].append(devices.mon.data_search_name[a_key][1])
if len(result_dict[i]) < 5:
result_dict[i].append('-')
result_dict[i].append('-')
result_dict[i].append(result_dict[i][4] + '_' + result_dict[i][3])
elif device_type == 'kiv':
for i in range(len(result_dict)):
tail = services.Trimmer.right(result_dict[i][0], 2)
head = services.Trimmer.left(result_dict[i][0], 4)
for key in devices.kiv.data_types:
if key == tail:
result_dict[i].append(devices.kiv.data_types[key])
elif key == head:
result_dict[i].append(devices.kiv.data_types[key])
elif 'ф.' in str(result_dict[i][0]): # Works with kiv.xlsx for a phase-parameters
if str(result_dict[i][0]).startswith(key):
result_dict[i].append(devices.kiv.data_types[key])
if len(result_dict[i]) < 2:
result_dict[i].append("other")
if result_dict[i][0].find("ф.") == -1:
result_dict[i].append('overall')
else:
codename = services.Trimmer.right((services.Trimmer.left(source_dict[i][0],
source_dict[i][0].find("ф.") + 3)), 1) + '0'
source_dict[i].append(codename)
if 'ф.' in str(result_dict[i][0]): # Works with kiv.xlsx for a phase-parameters
result_dict[i].append("MV")
elif 'unb' in str(result_dict[i][0]): # Works with kiv.xlsx for a phase-parameters
result_dict[i].append("unb")
else:
result_dict[i].append('no_voltage')
for a_key in devices.kiv.data_search_name:
# if services.Trimmer.left(source_dict[i][0], len(a_key)) == a_key and 'ф.' or 'Дата' in str(result_dict[i][0]):
if services.Trimmer.left(source_dict[i][0], len(a_key)) == a_key:
# noinspection PyBroadException
try:
source_dict[i][4] = devices.kiv.data_search_name[a_key][0]
source_dict[i][5] = devices.kiv.data_search_name[a_key][1]
except:
source_dict[i].append('-')
source_dict[i].append('-')
source_dict[i][4] = devices.kiv.data_search_name[a_key][0]
source_dict[i][5] = devices.kiv.data_search_name[a_key][1]
if len(source_dict[i]) < 5:
source_dict[i].append('-')
source_dict[i].append('-')
source_dict[i].append(source_dict[i][4] + '_' + source_dict[i][3])
elif device_type == 'gpp':
pass
return result_dict
def time_column(device_type='nkvv',
data: pd.core = None):
"""
Returns full name of a timestamp-column in data
Uses 'device' property 'self.file_parse_dates' (property stores a list of columns which contain time-type data)
The main time column (with the fixed time of the measurement) must be first in the list of 'self.file_parse_dates'
"""
the_time_column = None
device_type = device_type.lower()
if data is None:
data = analyzer.get_data(device_type=device_type)
parse_dates = devices.links(device_type)[4]
for an_element_of_parse_dates in parse_dates:
for a_column in list(data.columns):
if a_column.startswith(an_element_of_parse_dates):
the_time_column = a_column
break
try:
type(data[the_time_column]) == pd.core.series.Series
except KeyError:
print(f'Ошибка поиска колонки с временем замера, проверьте свойства устройства и атрибут'
f' "self.file_parse_dates" в модуле устройств "devices.py"')
return the_time_column
def columns_df(device_type='mon', cols: dict = None):
"""
Used for transformation of 'cols'-dict to pandas dataframe
"""
if cols is None:
cols = columns_analyzer(device_type=device_type)
return pd.DataFrame.from_dict(cols, orient='index', columns=[
'Наименование',
'Тип по ед. измерения',
'Датчик',
'Напряжение',
'Код краткий',
'Код полный',
'Код + напряжение'])