-
Notifications
You must be signed in to change notification settings - Fork 21
/
cam_demo.py
128 lines (94 loc) · 3.31 KB
/
cam_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
from __future__ import division
import time
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
import cv2
from util import *
from darknet import Darknet
from preprocess import prep_image, inp_to_image
import pandas as pd
import random
import pickle as pkl
def get_test_input(input_dim, CUDA):
img = cv2.imread("imgs/messi.jpg")
img = cv2.resize(img, (input_dim, input_dim))
img_ = img[:,:,::-1].transpose((2,0,1))
img_ = img_[np.newaxis,:,:,:]/255.0
img_ = torch.from_numpy(img_).float()
img_ = Variable(img_)
if CUDA:
img_ = img_.cuda()
return img_
def prep_image(img, inp_dim):
"""
Prepare image for inputting to the neural network.
Returns a Variable
"""
orig_im = img
dim = orig_im.shape[1], orig_im.shape[0]
img = cv2.resize(orig_im, (inp_dim, inp_dim))
img_ = img[:,:,::-1].transpose((2,0,1)).copy()
img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
return img_, orig_im, dim
def write(x, img, classes, your_class):
c1 = tuple(x[1:3].int())
c2 = tuple(x[3:5].int())
cls = int(x[-1])
label = "{0}".format(classes[cls])
if label in your_class:
color = (0,255,0)
cv2.rectangle(img, c1, c2,color, 1)
t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
cv2.rectangle(img, c1, c2,color, -1)
cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1);
return img
def yolo_output(frame, model, your_class, confidence, nms_thesh, CUDA, inp_dim):
num_classes = 80
bbox_attrs = 5 + num_classes
img, orig_im, dim = prep_image(frame, inp_dim)
im_dim = torch.FloatTensor(dim).repeat(1,2)
if CUDA:
im_dim = im_dim.cuda()
img = img.cuda()
output = model(Variable(img), CUDA)
output = write_results(output, confidence, num_classes, nms = True, nms_conf = nms_thesh)
output[:,1:5] = torch.clamp(output[:,1:5], 0.0, float(inp_dim))/inp_dim
# im_dim = im_dim.repeat(output.size(0), 1)
output[:,[1,3]] *= frame.shape[1]
output[:,[2,4]] *= frame.shape[0]
classes = load_classes('data/coco.names')
list(map(lambda x: write(x, orig_im, classes, your_class), output))
return orig_im
if __name__ == '__main__':
cfgfile = "cfg/yolov3.cfg"
weightsfile = "yolov3.weights"
confidence = 0.25
nms_thesh = 0.4
CUDA = torch.cuda.is_available()
model = Darknet(cfgfile)
model.load_weights(weightsfile)
model.net_info["height"] = 160
inp_dim = int(model.net_info["height"])
assert inp_dim % 32 == 0
assert inp_dim > 32
if CUDA:
model.cuda()
model.eval()
videofile = '/home/rex/Videos/Webcam/2019-09-02-164142.webm'
videofile = 0
cap = cv2.VideoCapture(videofile)
assert cap.isOpened(), 'Cannot capture source'
frames = 0
start = time.time()
while(1):
ret, frame = cap.read()
img = yolo_output(frame,model,['cell phone', 'person'], confidence, nms_thesh, CUDA, inp_dim)
cv2.imshow("frame", img)
key = cv2.waitKey(1)
if key & 0xFF == ord('q'):
break
frames += 1
# print("FPS of the video is {:5.2f}".format( frames / (time.time() )))