-
Notifications
You must be signed in to change notification settings - Fork 0
/
text_detection.py
122 lines (101 loc) · 4.15 KB
/
text_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import pydicom
import cv2 as cv
import numpy as np
import os
import pytesseract
import cv2
from glob import glob
import nibabel as nib
from PIL import Image
class TextRemoval:
"""
Class for performing text removal on images.
Attributes:
None
Methods:
predict: Apply text removal algorithm to an image.
__call__: Apply text removal to a directory of images.
"""
def __init__(self):
pass
@staticmethod
def predict(img: np.array) -> np.array:
"""
Apply text removal algorithm (tesseract) to an image.
Args:
img (np.array): Input image as a NumPy array.
Returns:
np.array: Image with text removed.
"""
threshold = 100
# Insert rectangle in middle of image to ignore this part in the first iteration
height, width = img.shape[:2]
left = int(width / 4)
top = int(height / 4)
right = int(width * 3 / 4)
bottom = int(height * 3 / 4)
img_covered = cv2.rectangle(img.copy(), (left, top), (right, bottom), (255, 255, 255), -1)
boxes = pytesseract.image_to_boxes(img_covered, output_type=pytesseract.Output.DICT, nice=1)
for left, bottom, right, top in zip(boxes["left"], boxes["bottom"], boxes["right"], boxes["top"]):
if right - left < threshold:
img = cv2.rectangle(img, (left, height - bottom), (right, height - top), (255, 255, 255), -1)
# Another iteration without the rectangle in the middle of the image
try:
boxes = pytesseract.image_to_boxes(img, output_type=pytesseract.Output.DICT, nice=1)
for left, bottom, right, top in zip(boxes["left"], boxes["bottom"], boxes["right"], boxes["top"]):
if right - left < threshold:
img = cv2.rectangle(img, (left, height - bottom), (right, height - top), (255, 255, 255), -1)
except:
pass
return img
def __call__(self, directory: str) -> None:
"""
Apply text removal to a directory of images.
Args:
directory (str): Path to the directory containing the images.
Returns:
None
"""
if os.path.isdir(directory):
files = glob(os.path.join(directory, '**', '*'), recursive=True)
else:
files = [directory]
for filepath in files:
file_ending = filepath.split('.')[-1].lower()
match file_ending:
# nifti
case 'png' | 'jpg':
img = cv.imread(filepath, 0)
base_fn = filepath[:-4]
case 'jpeg':
img = cv.imread(filepath, 0)
base_fn = filepath[:-5]
case 'dcm':
dcm = pydicom.dcmread(filepath, force=True)
img = dcm.pixel_array
base_fn = filepath[:-4]
case 'nii':
nifti = nib.load(filepath)
img = np.array(Image.fromarray(nifti.get_fdata().squeeze()).convert("RGB"))
base_fn = filepath[:-4]
case 'gz':
nifti = nib.load(filepath)
img = np.array(Image.fromarray(nifti.get_fdata().squeeze()).convert("RGB"))
base_fn = filepath[:-7]
case _:
raise NotImplementedError(
f'File ending {file_ending} not compatible, must be .dcm, .png, .jpg or .jpeg')
img = self.predict(img=img)
match file_ending:
# nifti
case 'png' | 'jpg' | 'jpeg':
cv.imwrite(f'{base_fn}_text_removed.png', img)
case 'dcm':
dcm.PixelData = img.tobytes()
dcm.save_as(f'{base_fn}_text_removed.png')
case 'nii':
nifti = nib.Nifti1Image(img, nifti.affine)
nib.save(nifti, f'{base_fn}_text_removed.nii')
case 'gz':
nifti = nib.Nifti1Image(img, nifti.affine)
nib.save(nifti, f'{base_fn}_text_removed.nii.gz')