-
Notifications
You must be signed in to change notification settings - Fork 8
/
segment_roi.py
executable file
·178 lines (158 loc) · 6.21 KB
/
segment_roi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
import numpy as np
from skimage import morphology
from skimage import measure
from sklearn.cluster import KMeans
from skimage.transform import resize
from glob import glob
main_path = "/disk1/luna16/"
working_path = "/disk1/luna16/output2/"
file_list = glob(working_path + "images_*.npy")
for img_file in file_list:
# I ran into an error when using Kmean on np.float16, so I'm using np.float64 here
imgs_to_process = np.load(img_file).astype(np.float64)
print("on image", img_file)
for i in range(len(imgs_to_process)):
img = imgs_to_process[i]
# Standardize the pixel values
mean = np.mean(img)
std = np.std(img)
img = img - mean
img = img / std
# Find the average pixel value near the lungs
# to renormalize washed out images
middle = img[100:400, 100:400]
mean = np.mean(middle)
max = np.max(img)
min = np.min(img)
<<<<<<< HEAD
<<<<<<< HEAD
=======
>>>>>>> 96a5d5ce46bc223804d4a8d59249ebf2592973fd
=======
>>>>>>> 96a5d5ce46bc223804d4a8d59249ebf2592973fd
# To improve threshold finding, I'm moving the
# underflow and overflow on the pixel spectrum
img[img == max] = mean
img[img == min] = mean
kmeans = KMeans(n_clusters=2).fit(np.reshape(middle, [np.prod(middle.shape), 1]))
centers = sorted(kmeans.cluster_centers_.flatten())
threshold = np.mean(centers)
thresh_img = np.where(img < threshold, 1.0, 0.0) # threshold the image
eroded = morphology.erosion(thresh_img, np.ones([4, 4]))
dilation = morphology.dilation(eroded, np.ones([10, 10]))
# any regions that are too
# close to the top and bottom are removed
#
labels = measure.label(dilation)
label_vals = np.unique(labels)
regions = measure.regionprops(labels)
good_labels = []
for prop in regions:
B = prop.bbox
if B[2] - B[0] < 475 and B[3] - B[1] < 475 and B[0] > 40 and B[2] < 472:
good_labels.append(prop.label)
mask = np.ndarray([512, 512], dtype=np.int8)
mask[:] = 0
#
# The mask here is the mask for the lungs--not the nodes
# After just the lungs are left, we do another large dilation
# in order to fill in and out the lung mask
#
for N in good_labels:
mask = mask + np.where(labels == N, 1, 0)
mask = morphology.dilation(mask, np.ones([10, 10])) # one last dilation
imgs_to_process[i] = mask
np.save(img_file.replace("images", "lungmask"), imgs_to_process)
#
# Here we're applying the masks and cropping and resizing the image
#
file_list = glob(working_path + "lungmask_*.npy")
out_images = [] # final set of images
out_nodemasks = [] # final set of nodemasks
for fname in file_list:
print("working on file ", fname)
imgs_to_process = np.load(fname.replace("lungmask", "images"))
masks = np.load(fname)
node_masks = np.load(fname.replace("lungmask", "masks"))
for i in range(len(imgs_to_process)):
mask = masks[i]
node_mask = node_masks[i]
img = imgs_to_process[i]
new_size = [512, 512] # we're scaling back up to the original size of the image
img = mask * img # apply lung mask
#
# renormalizing the masked image (in the mask region)
#
new_mean = np.mean(img[mask > 0])
new_std = np.std(img[mask > 0])
#
# Pulling the background color up to the lower end
# of the pixel range for the lungs
#
old_min = np.min(img) # background color
img[img == old_min] = new_mean - 1.2 * new_std # resetting backgound color
img = img - new_mean
img = img / new_std
# make image bounding box (min row, min col, max row, max col)
labels = measure.label(mask)
regions = measure.regionprops(labels)
#
# Finding the global min and max row over all regions
#
min_row = 512
max_row = 0
min_col = 512
max_col = 0
for prop in regions:
B = prop.bbox
if min_row > B[0]:
min_row = B[0]
if min_col > B[1]:
min_col = B[1]
if max_row < B[2]:
max_row = B[2]
if max_col < B[3]:
max_col = B[3]
width = max_col - min_col
height = max_row - min_row
if width > height:
max_row = min_row + width
else:
max_col = min_col + height
#
# cropping the image down to the bounding box for all regions
#
img = img[min_row:max_row, min_col:max_col]
mask = mask[min_row:max_row, min_col:max_col]
if max_row - min_row < 5 or max_col - min_col < 5: # skipping all images with no god regions
pass
else:
# moving range to -1 to 1 to accomodate the resize function
mean = np.mean(img)
img = img - mean
min = np.min(img)
max = np.max(img)
img = img / (max - min)
new_img = resize(img, [512, 512])
new_node_mask = resize(node_mask[min_row:max_row, min_col:max_col], [512, 512])
out_images.append(new_img)
out_nodemasks.append(new_node_mask)
num_images = len(out_images)
#
# Writing out images and masks as 1 channel arrays for input into network
#
final_images = np.ndarray([num_images, 1, 512, 512], dtype=np.float32)
final_masks = np.ndarray([num_images, 1, 512, 512], dtype=np.float32)
for i in range(num_images):
final_images[i, 0] = out_images[i]
final_masks[i, 0] = out_nodemasks[i]
rand_i = np.random.choice(range(num_images), size=num_images, replace=False)
test_i = int(0.2*num_images)
val_i = int(0.1*num_images)
np.save(main_path+"trainImages.npy",final_images[rand_i[test_i:]])
np.save(main_path+"trainMasks.npy",final_masks[rand_i[test_i:]])
np.save(main_path+"valImages.npy",final_images[rand_i[val_i:test_i]])
np.save(main_path+"valMasks.npy",final_masks[rand_i[val_i:test_i]])
np.save(main_path+"testImages.npy",final_images[rand_i[:val_i]])
np.save(main_path+"testMasks.npy",final_masks[rand_i[:val_i]])
print('process done...')