-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhand_training.py
233 lines (167 loc) · 6.73 KB
/
hand_training.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
import dlib
import glob
import cv2
import os
import sys
import time
import numpy as np
import matplotlib.pyplot as plt
import pyautogui as pyg
import shutil
from imutils import face_utils
import math
# If cleanup is True then the new images and annotations will be appended to previous ones
# If False then all previous images and annotations will be deleted.
cleanup = True
# Set the window to a normal one so we can adjust it
cv2.namedWindow('frame', cv2.WINDOW_NORMAL)
# Resize the window and adjust it to the center
# This is done so we're ready for capturing the images.
cv2.resizeWindow('frame', 1920,1080)
cv2.moveWindow("frame", 0,0)
# Initialize webcam
cap = cv2.VideoCapture(0)
window_name = "tracking"
# Initalize sliding window's x1,y1
x1 ,y1 = 0,0
# These will be the width and height of the sliding window.
window_width = 190#140
window_height = 190
# We will save images after every 4 frames
# This is done so we don't have lot's of duplicate images
skip_frames = 3
frame_gap = 0
# This is the directory where our images will be stored
# Make sure to change both names if you're saving a different Detector
directory = 'train_images_h'
box_file = 'boxes_h.txt'
# If cleanup is True then delete all imaages and bounding_box annotations.
if cleanup:
# Delete the images directory if it exists
if os.path.exists(directory):
shutil.rmtree(directory)
# Clear up all previous bounding boxes
open(box_file, 'w').close()
# Initialize the counter to 0
counter = 0
elif os.path.exists(box_file):
# If cleanup is false then we must append the new boxes with the old
with open(box_file,'r') as text_file:
box_content = text_file.read()
# Set the counter to the previous highest checkpoint
counter = int(box_content.split(':')[-2].split(',')[-1])
# Open up this text file or create it if it does not exists
fr = open(box_file, 'a')
# Create our image directory if it does not exists.
if not os.path.exists(directory):
os.mkdir(directory)
# Initial wait before you start recording each row
initial_wait = 0
# Start the loop for the sliding window
while(True):
# Start reading from camera
ret, frame = cap.read()
if not ret:
print("failed")
break
# Invert the image laterally to get the mirror reflection.
frame = cv2.flip( frame, 1 )
# Make a copy of the original frame
orig = frame.copy()
# Wait the first 50 frames so that you can place your hand correctly
if initial_wait > 60:
# Increment frame_gap by 1.
frame_gap +=1
# Move the window to the right by some amount in each iteration.
if x1 + window_width < frame.shape[1]:
x1 += 4
time.sleep(0.1)
elif y1 + window_height + 270 < frame.shape[0]:
# If the sliding_window has reached the end of the row then move down by some amount.
# Also start the window from start of the row
y1 += 200
x1 = 0
# Setting frame_gap and init_wait to 0.
# This is done so that the user has the time to place the hand correctly
# in the next row before image is saved.
frame_gap = 0
initial_wait = 0
# Break the loop if we have gone over the whole screen.
else:
break
else:
initial_wait += 1
# Save the image every nth frame.
if frame_gap == skip_frames:
# Set the image name equal to the counter value
img_name = str(counter) + '.png'
# Save the Image in the defined directory
img_full_name = directory + '/' + str(counter) + '.png'
cv2.imwrite(img_full_name, orig)
# Save the bounding box coordinates in the text file.
fr.write('{}:({},{},{},{}),'.format(counter, x1, y1, x1+window_width, y1+window_height))
# Increment the counter
counter += 1
# Set the frame_gap back to 0.
frame_gap = 0
# Draw the sliding window
cv2.rectangle(frame,(x1,y1),(x1+window_width,y1+window_height),(0,255,0),3)
# Display the frame
cv2.imshow('frame', frame)
if cv2.waitKey(1) == ord('q'):
break
# Release camera and close the file and window
cap.release()
cv2.destroyAllWindows()
fr.close()
# In this dictionary our images and annotations will be stored.
data = {}
# Get the indexes of all images.
image_indexes = [int(img_name.split('.')[0]) for img_name in os.listdir(directory)]
# Shuffle the indexes to have random train/test split later on.
np.random.shuffle(image_indexes)
# Open and read the content of the boxes.txt file
f = open(box_file, "r")
box_content = f.read()
# Convert the bounding boxes to dictionary in the format `index: (x1,y1,x2,y2)` ...
box_dict = eval( '{' +box_content + '}' )
# Close the file
f.close()
# Loop over all indexes
for index in image_indexes:
# Read the image in memmory and append it to the list
img = cv2.imread(os.path.join(directory, str(index) + '.png'))
# Read the associated bounding_box
bounding_box = box_dict[index]
# Convert the bounding box to dlib format
x1, y1, x2, y2 = bounding_box
dlib_box = [ dlib.rectangle(left=x1 , top=y1, right=x2, bottom=y2) ]
# Store the image and the box together
data[index] = (img, dlib_box)
# This is the percentage of data we will use to train
# The rest will be used for testing
percent = 0.8
# How many examples make 80%.
split = int(len(data) * percent)
# Seperate the images and bounding boxes in different lists.
images = [tuple_value[0] for tuple_value in data.values()]
bounding_boxes = [tuple_value[1] for tuple_value in data.values()]
# Initialize object detector Options
options = dlib.simple_object_detector_training_options()
# I'm disabling the horizontal flipping, becauase it confuses the detector if you're training on few examples
# By doing this the detector will only detect left or right hand (whichever you trained on).
options.add_left_right_image_flips = False
# Set the c parameter of SVM equal to 5
# A bigger C encourages the model to better fit the training data, it can lead to overfitting.
# So set an optimal C value via trail and error.
options.C = 5
# Note the start time before training.
st = time.time()
# You can start the training now
detector = dlib.train_simple_object_detector(images[:split], bounding_boxes[:split], options)
# Print the Total time taken to train the detector
print('Training Completed, Total Time taken: {:.2f} seconds'.format(time.time() - st))
file_name = 'Head_Detector.svm'
detector.save(file_name)
detector = dlib.train_simple_object_detector(images, bounding_boxes, options)
detector.save(file_name)