-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMAIN_PROSS.py
703 lines (587 loc) · 28.3 KB
/
MAIN_PROSS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
# coding=utf-8
# from PIL import Image
from pprint import pprint
from glob import glob
from string import digits
import re
import numpy
import requests
import json
import cv2
import base64
import matplotlib.pyplot as plt
from colorama import init, Fore, Back
init(autoreset=True)
plt.switch_backend('agg')
import shutil
import os
from collections import defaultdict, OrderedDict
import PRE_pross
import configparser
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor
class configparser_custom(configparser.ConfigParser): # 解决默认被转换为小写问题
def __init__(self, defaults=None):
configparser.ConfigParser.__init__(self, defaults=defaults)
def optionxform(self, optionstr):
return optionstr
def as_dict(self):
"""
将configparser.ConfigParser().read()读到的数据转换成dict返回
:return:
"""
d = dict(self._sections)
for k in d:
d[k] = dict(d[k])
return d
class ClassSendtoOcr(object): # 定义送入ocr的class结构
"""
此处导入使用class包装的CV格式图片和OCR识别服务器地址及端口,
"""
def __init__(self, cvimg: numpy.ndarray, ipaddr: str):
self.img_send = cvimg
self.address = ipaddr
def send(self): # 原net_ocr_class
"""
此处导入使用class包装的CV格式图片和OCR识别服务器地址及端口,
使用了py.requests包实现http发送功能,
接收远程返回的json报文并直接返回。
:return: str
"""
# 发送HTTP请求
data = {'images': [cv2_to_base64(self.img_send)]}
headers = {"Content-type": "application/json"}
# url = f"http://127.0.0.1:8866/predict/chinese_ocr_db_crnn_server"
url = f"http://{self.address}/predict/chinese_ocr_db_crnn_mobile"
try:
r = requests.post(url=url, headers=headers, data=json.dumps(data))
except ConnectionRefusedError:
r = 'OCROFFLINE'
print(r)
except requests.exceptions.ConnectionError:
r = 'OCROFFLINE'
print(r)
# pprint(r)
return r
class AnswerToReport(object):
def __init__(self, ocr_answer: list, list_title: list):
self.answer = ocr_answer
self.list_title = list_title
def to_list(self):
def data_align_v2(input_body_list, list_title):
"""
此处导入识别体list和每一列的数据名称
:return: list
"""
# todo 如何改进定义严格程度使更精准(单位像素)
# 目前仅在合并无效行开头(项目名)使用了judge_new
# 初始化输出列表
list_out = []
dict_out = []
# 确定每行共有多少列
number_of_columns = len(input_body_list) # 此时input_list[numb...]和input_list[0]为检测项目名字,即对齐依据
# number_of_columns = int(len(input_list)/2) # 此时input_list[numb...]和input_list[0]为检测项目名字,即对齐依据
def get_h_location_func(child_list_input):
list_name_h_location_output = []
for i in range(len(child_list_input)): # 遍历第一次识别的所有结果
h_u = 0.5 * (float(child_list_input[i]['text_box_position'][0][1]) +
float(child_list_input[i]['text_box_position'][1][1]))
h_d = 0.5 * (float(child_list_input[i]['text_box_position'][2][1]) +
float(child_list_input[i]['text_box_position'][3][1]))
h_location = 0.5 * (h_u + h_d) # 确定每一项的高度位置(取4个点的竖座标取平均值)
list_name_h_location_output.append(h_location)
return list_name_h_location_output
list_name_h_location = get_h_location_func(input_body_list[0][0]['data']) # 确定项目名称高度坐标
list_name_h_location_avg = []
for i in range(len(list_name_h_location) - 1):
list_name_h_location_avg.append(list_name_h_location[i + 1] - list_name_h_location[i])
judge_new = sum(list_name_h_location_avg) / (len(list_name_h_location_avg))
judge_new = judge_new * 0.3
# 解决一行名字被识别为两项的情况(名字中间有过长空格,对处于同一高度的项目进行合并)
list_invalid_name = [] # 记录无效项
for i in range(len(list_name_h_location)):
if i in list_invalid_name:
pass
else:
for j in range(len(list_name_h_location)):
if j <= i:
pass
else:
if abs(list_name_h_location[i] - list_name_h_location[j]) < judge_new:
list_invalid_name.append(j)
list_name_correct = [] # 经过确认的有效行头
list_name_position_correct = []
for i in range(len(input_body_list[0][0]['data'])):
if i in list_invalid_name:
for n in range(4): # 往前推,合并到最近的正常项
if i - 1 - n in list_invalid_name:
pass
else:
list_name_correct[-1] = list_name_correct[-1] + ' ' + (
input_body_list[0][0]['data'][i]['text']) # 向前合并
break
pass
else:
list_name_correct.append(input_body_list[0][0]['data'][i]['text']) # 第一次组合,先填入行名字(检测项目名)
list_name_position_correct.append(list_name_h_location[i])
# 无效检测项目合并完毕
for i in range(len(list_name_correct)): # 以下开始组合每一条数据
list_out_child = []
dict_out_child = dict()
list_out_child.append(list_name_correct[i]) # 确定第一项:名字
dict_out_child[list_title[0]] = list_name_correct[i]
for k in range(1, number_of_columns): # 按照竖行循环
if len(input_body_list[k][0]['data']) == 0:
list_out_child.append('空')
dict_out_child[list_title[k]] = '空'
pass
else:
list_diy_h_location = get_h_location_func(input_body_list[k][0]['data']) # 确定其他项的高度
for l in range(len(list_diy_h_location)): # 改为逐项匹配
column_head_diff = []
for m in range(len(list_name_position_correct)):
column_head_diff.append(abs(list_name_position_correct[m] - list_diy_h_location[l]))
# 以下开始寻找最近的项,对齐
for _ in range(2):
min_number = min(column_head_diff)
min_index = column_head_diff.index(min_number)
# column_head_diff[min_index] = 0 # 得到最小项
if i == min_index:
list_out_child.append(input_body_list[k][0]['data'][l]['text'])
dict_out_child[list_title[k]] = input_body_list[k][0]['data'][l]['text']
if len(list_out_child) == k:
list_out_child.append('空')
dict_out_child[list_title[k]] = '空'
list_out.append(list_out_child)
dict_out.append(dict_out_child)
return list_out, dict_out
# todo 改善对齐
if int(len(self.answer)) % 2 == 1:
print('需要对齐的列表有问题')
return '错误:需要对齐的列表有问题'
# todo 将坐标信息根据配置文件还原对齐
'''
# 因为使用了ocr图片加白边避免ocr不识别的bug,故此处仍需改进
for i in range(len(answer)):
for j in range(len(answer[i][0]['data'])):
for k in range(len(answer[i][0]['data'][j]['text_box_position'])):
answer[i][0]['data'][j]['text_box_position'][k][0] = answer[i][0]['data'][j]['text_box_position'][k][0] + box_list[i][0]
answer[i][0]['data'][j]['text_box_position'][k][0] = answer[i][0]['data'][j]['text_box_position'][k][1] + box_list[i][2]
'''
answer1 = []
answer2 = []
list_title1 = []
list_title2 = []
for i in range(len(self.answer)):
if i < int(len(self.answer) / 2):
answer1.append(self.answer[i])
list_title1.append(self.list_title[i])
else:
answer2.append(self.answer[i])
list_title2.append(self.list_title[i])
list_direct, dict_direct = data_align_v2(answer1, list_title1)
list_direct2, dict_direct2 = data_align_v2(answer2, list_title2)
list_direct.extend(list_direct2)
dict_direct.extend(dict_direct2)
bloodtest_list = []
for i in range(len(dict_direct)):
bloodtest_single = OrderedDict()
for j in range(len(dict_direct[i])):
try:
bloodtest_single[self.list_title[j]] = dict_direct[i][self.list_title[j]]
except:
bloodtest_single[self.list_title[j]] = '空'
bloodtest_list.append(bloodtest_single)
return bloodtest_list
class ClassReportJson:
def __init__(self):
self.hos_name = '' # 所属医院
self.repo_type = '' # 报告类型
self.name = '' # 姓名
self.age = '' # 年龄
self.sex = '' # 性别
self.repo_data = '' # 报告时间
self.list = [] # todo 详细内容列表
def cv2_to_base64(image):
"""
此处导入CV格式图片,
使用了py.opencv.imdecode包实现图片转换,
返回base64形式的图片信息。
:return: str
"""
data = cv2.imencode('.jpg', image)[1]
return base64.b64encode(data.tobytes()).decode('utf8')
def data_align_old(input_body_list, list_title):
'''
此处导入识别主体list和每一列的数据名称
:return: list
'''
# todo 如何改进定义严格程度使更精准(单位像素)
judge = 13
# 初始化输出列表
list_out = []
dict_out = []
# 确定每行共有多少列
number_of_columns = len(input_body_list) # 此时input_list[numb...]和input_list[0]为检测项目名字,即对齐依据
# number_of_columns = int(len(input_list)/2) # 此时input_list[numb...]和input_list[0]为检测项目名字,即对齐依据
def get_h_location_func(child_list_input):
list_name_h_location_output = []
for i in range(len(child_list_input)): # 遍历第一次识别的所有结果
h_u = 0.5 * (float(child_list_input[i]['text_box_position'][0][1]) +
float(child_list_input[i]['text_box_position'][1][1]))
h_d = 0.5 * (float(child_list_input[i]['text_box_position'][2][1]) +
float(child_list_input[i]['text_box_position'][3][1]))
h_location = 0.5 * (h_u + h_d) # 确定每一项的高度位置(取4个点的竖座标取平均值)
list_name_h_location_output.append(h_location)
return list_name_h_location_output
list_name_h_location = get_h_location_func(input_body_list[0][0]['data']) # 确定项目名称高度坐标
list_name_h_location_avg = []
for i in range(len(list_name_h_location) - 1):
list_name_h_location_avg.append(list_name_h_location[i + 1] - list_name_h_location[i])
judge_new = sum(list_name_h_location_avg) / (len(list_name_h_location_avg))
# 解决一行名字被识别为两项的情况(名字中间有过长空格,对处于同一高度的项目进行合并)
list_invalid_name = [] # 记录无效项
for i in range(len(list_name_h_location)):
if i in list_invalid_name:
pass
else:
for j in range(len(list_name_h_location)):
if j <= i:
pass
else:
if abs(list_name_h_location[i] - list_name_h_location[j]) < judge_new:
list_invalid_name.append(j)
list_name_correct = []
list_name_position_correct = []
for i in range(len(input_body_list[0][0]['data'])):
if i in list_invalid_name:
for n in range(4): # 往前推,合并到最近的正常项
if i - 1 - n in list_invalid_name:
pass
else:
list_name_correct[-1] = list_name_correct[-1] + ' ' + (
input_body_list[0][0]['data'][i]['text']) # 向前合并
break
pass
else:
list_name_correct.append(input_body_list[0][0]['data'][i]['text']) # 第一次组合,先填入行名字(检测项目名)
list_name_position_correct.append(list_name_h_location[i])
for i in range(len(list_name_correct)): # 以下开始组合每一条数据
list_out_child = []
dict_out_child = dict()
list_out_child.append(list_name_correct[i]) # 确定第一项:名字
dict_out_child[list_title[0]] = list_name_correct[i]
for k in range(1, number_of_columns):
if len(input_body_list[k][0]['data']) == 0:
list_out_child.append('空')
dict_out_child[list_title[k]] = '空'
pass
else:
list_diy_h_location = get_h_location_func(input_body_list[k][0]['data']) # 确定其他项的高度
for l in range(len(list_diy_h_location)):
if abs(list_name_position_correct[i] - list_diy_h_location[l]) < judge_new: # todo 如何判断高度最近
list_out_child.append(input_body_list[k][0]['data'][l]['text'])
dict_out_child[list_title[k]] = input_body_list[k][0]['data'][l]['text']
break
else:
pass
if len(list_out_child) == k:
list_out_child.append('空')
dict_out_child[list_title[k]] = '空'
list_out.append(list_out_child)
dict_out.append(dict_out_child)
return list_out, dict_out
def read_keywords(path):
"""
此处导入包含报告关键词的conf文件路径,
返回读取到的关键词列表
:return: list
"""
keys = configparser_custom()
keys.read(path, 'UTF-8')
keys_read = keys.items("keywords")
return keys_read
def type_judge(lstKwds_need_judge=list, conf_path=str):
"""
此处导入包含报告关键词的conf文件路径,
返回判断结果
:return: str
"""
path = f"{conf_path}/[*.conf"
lstTxtFiles = glob(path)
for strTxtFile in lstTxtFiles:
keys_list = read_keywords(strTxtFile)
keys_list_t = []
for w in range(len(keys_list)):
keys_list_t.append(keys_list[w][1])
# strContent = txtWrapper.read() # 读关键词
i = 0.00
n = 0.00
for strKwd in keys_list_t: # 用每个从本地读取到的关键词去匹配
n = len(keys_list_t)
if PRE_pross.charactor_match_any(lstKwds_need_judge, strKwd):
# if strKwd in strContent: # 如果命中
i = i + 1
print(i / n)
if (i / n) > 0.4:
# print(os.path.basename(strTxtFile))
find = os.path.basename(strTxtFile)
find_no_ex = find.split('.conf')
find_type = find_no_ex[0].split(']')
type = find_type[1]
print(Back.GREEN + type)
return type
def main_pross(cvimg, demo_or_not, hospital_lock, report_type_lock):
img_org = cvimg
img_gamma = PRE_pross.gamma(img_org)
class_report = ClassReportJson() # 定义类结构
OCR_IP_PATH = 'conf/OCR_IP.conf'
conf_ocr_ip = configparser_custom()
conf_ocr_ip.read(OCR_IP_PATH, 'UTF-8')
ocr_ip_t = conf_ocr_ip.items("ip")
ocr_ip = ocr_ip_t[0][1]
# 判断所属医院以及检验项目
img_gamma = PRE_pross.image_border(img_input=img_gamma,
dst='0')
'''
ocr_pack = ClassSendtoOcr
ocr_pack.img_send = img_gamma
ocr_pack.address = ocr_ip
pre_response = net_OCR_class(ocr_pack)
'''
first_recg = ClassSendtoOcr(cvimg=img_gamma, ipaddr=ocr_ip)
pre_response = first_recg.send()
if pre_response is 'OCROFFLINE':
return '错误:OCR离线'
elif len(pre_response.json()["results"]) == 0:
# print('OCRERR')
return f'错误:OCR没有正常工作:\n{pre_response.json()["msg"]}'
# pprint(pre_response.json()["results"][0]["data"])
report_overview = []
for i in range(len(pre_response.json()["results"][0]["data"])):
report_overview.append(pre_response.json()["results"][0]["data"][i]['text'])
# 取出医院关键词
if hospital_lock == False:
hospital = PRE_pross.charactor_match_hospital_name(report_overview, '医院')
class_report.hos_name = PRE_pross.charactor_match_hospital_name(report_overview, '医院') # class
if hospital_lock == True:
hospital = '复旦大学附属华山医院'
class_report.hos_name = '复旦大学附属华山医院' # class
path_prefix = hospital
# ocr会把间隔大的文字分开识别,大概率优先识别为中文字符
if hospital is None:
# print('未能识别医院信息')
return '错误:未能识别所属医院,请拍摄完整的报告单图片,并保证纸面平整'
# 读取报告类型关键词
if report_type_lock is False:
report_type = type_judge(lstKwds_need_judge=report_overview,
conf_path='conf')
class_report.repo_type = type_judge(lstKwds_need_judge=report_overview,
conf_path='conf') # class
if report_type_lock is True:
report_type = '肺功能'
class_report.repo_type = '肺功能' # class
path_suffix = f'-{report_type}'
conf_path = f'conf/{path_prefix}{path_suffix}.conf'
if os.path.exists(conf_path) is False:
# print('配置文件不存在')
return '错误:目前暂不支持此医院的此种报告'
img_feature_path = f'OCR_IMG/Feature_IMG/{path_prefix}{path_suffix}.jpg'
if os.path.exists(img_feature_path) is False:
# print('特征图片不存在')
return '错误:缺少特征图片,无法匹配,请等候开发者后续维护'
# 读取配置
# conf_path = 'conf/bj-aerospace-blood-normal.conf'
conf = configparser_custom()
conf.read(conf_path, 'UTF-8')
boxes_conf = conf.items("boxes")
name_list = []
# box_list[] 格式:[左,右,上,下]
box_list = []
for w in range(len(boxes_conf)):
name_list.append(boxes_conf[w][0])
box_list.append(boxes_conf[w][1].split(','))
box_list[w] = list(map(int, box_list[w]))
# 新,读配置文件为dict,方便使用配置文件的标题对数据组合进行自动标识
dict_boxes_conf = conf.as_dict()["boxes"]
'''
for list_i in dict_boxes_conf.items():
print(list_i)
'''
list_title = []
for list_i in dict_boxes_conf:
remove_digits = str.maketrans('', '', digits)
list_i = list_i.translate(remove_digits)
list_title.append(list_i)
# 特征匹配准备裁剪
# img_template = cv2.imread(img_feature_path, 0)
img_template = PRE_pross.cv_imread_chs(img_feature_path)
# 灰度化
img_template = cv2.cvtColor(img_template, cv2.COLOR_BGR2GRAY)
img_need_pross = cv2.cvtColor(img_gamma, cv2.COLOR_BGR2GRAY)
img_small_1k, ratio = PRE_pross.zoom_to_1k(img_need_pross) # 屏幕匹配提速
# [旧]correct_points, knn_result = knn_match_old(img_template, img_small_1k, demo)
correct_matrix, knn_result = PRE_pross.knn_match_new(template_img=img_template,
img_need_match=img_small_1k,
demo=demo_or_not)
if knn_result == 2874734:
return '错误:未能成功探测布局,建议重新拍摄'
# 以下是新变换办法,直接用单应性矩阵变换后直接裁剪得到目标图像(能解决老办法不能自动旋转的问题)
correct_matrix[0][2] = correct_matrix[0][2] / ratio
correct_matrix[1][2] = correct_matrix[1][2] / ratio
correct_matrix[2][0] = correct_matrix[2][0] * ratio
correct_matrix[2][1] = correct_matrix[2][1] * ratio
img_screen_cut = cv2.warpPerspective(img_need_pross, correct_matrix, (round((img_template.shape[1]) / ratio),
round((img_template.shape[0]) / ratio)))
# cv2.imwrite('res.jpg', img_screen_cut)
# plt.imshow(img_screen_cut, 'gray'), plt.show()
img_screen_cut_1k, ratio_outdate = PRE_pross.zoom_to_1k(img_screen_cut)
cv2.imwrite('temp/region.jpg', img_screen_cut_1k) # 适配开源血常规
# 将用户信息识别滞后,提升识别概率
last_recg = ClassSendtoOcr(img_screen_cut_1k, ocr_ip)
usr_info_response = last_recg.send()
if usr_info_response is 'OCROFFLINE':
return '错误:OCR离线'
elif len(usr_info_response.json()["results"]) == 0:
# print('OCRERR')
return f'错误:OCR没有正常工作,\n{usr_info_response.text}'
# pprint(usr_info_response.json()["results"][0]["data"])
usr_info_overview = []
for i in range(len(usr_info_response.json()["results"][0]["data"])):
usr_info_overview.append(usr_info_response.json()["results"][0]["data"][i]['text'])
patient_name = PRE_pross.charactor_match_count_name_age(usr_info_overview, '名:')
class_report.name = PRE_pross.charactor_match_count_name_age(usr_info_overview, '名:') # class
if patient_name:
# 以防万一把所有冒号前的东西重新统一
patient_name = re.sub(r'.*:', '姓名:', patient_name)
class_report.name = re.sub(r'.*:', '姓名:', patient_name) # class
else:
patient_name = '姓名:'
class_report.name = '姓名:' # class
patient_sex = PRE_pross.charactor_match_count_sex(usr_info_overview, '别:')
class_report.sex = PRE_pross.charactor_match_count_sex(usr_info_overview, '别:') # class
if patient_sex:
patient_sex = re.sub(r'.*:', '性别:', patient_sex)
class_report.sex = re.sub(r'.*:', '性别:', patient_sex) # class
else:
patient_name = '性别:'
class_report.sex = '性别:' # class
patient_age = PRE_pross.charactor_match_count_name_age(usr_info_overview, '龄:')
class_report.age = PRE_pross.charactor_match_count_name_age(usr_info_overview, '龄:') # class
if patient_age:
patient_age = re.sub(r'.*:', '年龄:', patient_age)
class_report.age = re.sub(r'.*:', '年龄:', patient_age) # class
else:
patient_name = '年龄:'
class_report.age = '年龄:' # class
repo_date = PRE_pross.charactor_match_count_name_age(usr_info_overview, '报告日期:')
class_report.repo_data = PRE_pross.charactor_match_count_name_age(usr_info_overview, '报告日期:') # class
if repo_date:
repo_date = re.sub(r'.*:', '报告日期:', repo_date)
class_report.repo_data = repo_date = re.sub(r'.*:', '报告日期:', repo_date) # class
else:
repo_date = PRE_pross.charactor_match_count_name_age(usr_info_overview, '报告时间:')
class_report.repo_data = PRE_pross.charactor_match_count_name_age(usr_info_overview, '报告时间:') # class
if repo_date:
repo_date = re.sub(r'.*:', '报告时间:', repo_date)
class_report.repo_data = re.sub(r'.*:', '报告时间:', repo_date) # class
else:
repo_date = '报告日期:'
class_report.repo_data = '报告日期:' # class
# 用户信息识别结束
img_screen_cut = PRE_pross.length_width_ratio_correct(img_template=img_template,
img_input=img_screen_cut) # 长宽比校正
# 下面开始按照比例裁剪识别区域
img_element = PRE_pross.mask_processing_new(img_input=img_screen_cut,
boxes_coordinate_xy=box_list,
demo_or_not=demo_or_not,
type_char='repo',
out_name='report') # 根据配置裁剪数据区
# 拓宽防止ocr不识别的bug
for i in range(len(img_element)):
img_element[i] = PRE_pross.image_border(img_input=img_element[i],
dst='0')
# 取得识别结果
answer = []
'''
# 非多线程
for n in range(len(img_element)):
response = net_OCR(img_element[n], ocr_ip)
if response is 'OCROFFLINE':
return '错误:OCR离线'
elif len(response.json()["results"]) == 0:
# print('OCRERR')
return f'错误:OCR未正常工作,{response.text}'
# pprint(response.json()["results"][0]["data"])
answer.append(response)
answer[n] = answer[n].json()["results"]
'''
# 多线程
ocr_pool = ThreadPoolExecutor(max_workers=10)
answer_muity = [0] * (len(img_element))
jobs = [0] * (len(img_element))
for n in range(len(img_element)):
rec_element = ClassSendtoOcr(img_element[n], ocr_ip)
jobs[n] = ocr_pool.submit(rec_element.send)
ocr_pool.shutdown(wait=True)
# 多线程结束
for n in range(len(img_element)):
# answer_muity[n] = get_result(jobs[n])
answer_muity[n] = jobs[n].result()
if answer_muity[n] is 'OCROFFLINE':
return '错误:OCR离线'
elif len(answer_muity[n].json()["results"]) == 0:
# print('OCRERR')
return f'错误:OCR未正常工作,{answer_muity[n].text}'
# pprint(response.json()["results"][0]["data"])
answer.append(answer_muity[n])
answer[n] = answer[n].json()["results"]
# 判断是否识别完全
# print(answer[1][0]['data'][1]['text'])
# 以下开始处理识别回传数据
# 格式说明: ?_position[],先长后高,左上开始顺时针4点
# 开始根据坐标对齐
answer_need_to_alligin = AnswerToReport(ocr_answer=answer, list_title=list_title)
class_report.list = answer_need_to_alligin.to_list()
# 加入附加信息
'''
bloodtest_single = OrderedDict()
bloodtest_single["name"] = f'{hospital}{path_suffix}'
bloodtest_single["value"] = patient_name
bloodtest_single["range"] = patient_sex
bloodtest_single["alias"] = '空白信息2'
bloodtest_single["unit"] = patient_age
bloodtest_list.append(bloodtest_single)
'''
test_dict_class = {
'hospital': f'{class_report.hos_name}',
'repo_type': f'{class_report.repo_type}',
'repo_date': f'{class_report.repo_data}',
'name': f'{class_report.name}',
'age': f'{class_report.age}',
'sex': f'{class_report.sex}',
'bloodtest': class_report.list,
'write_time': f'{datetime.now()}',
'explain': {
'used': True,
'details': "json生成测试",
}
}
# json_str = json.dumps(test_dict, ensure_ascii=False, indent=4)
json_str = json.dumps(test_dict_class, ensure_ascii=False, indent=4) # class
with open('test_data.json', 'w') as json_file:
json_file.write(json_str)
return json_str
if __name__ == '__main__':
# 清空临时文件
shutil.rmtree('temp')
os.mkdir('temp')
os.mkdir('temp/DEMO')
os.mkdir('temp/ocr_result')
os.mkdir('temp/DEMO/mask')
img_orig_path = 'OCR_IMG/Input_IMG/zs-blood-normal.jpg'
img_input = PRE_pross.cv_imread_chs(img_orig_path)
main_pross(cvimg=img_input,
demo_or_not=1,
hospital_lock=False,
report_type_lock=False)