-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvideo_detection.py
299 lines (226 loc) · 12.1 KB
/
video_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
# Part of source codes, tutorials, contributions:
# https://realpython.com/blog/python/face-detection-in-python-using-a-webcam/
# http://hanzratech.in/2015/02/03/face-recognition-using-opencv.html
# http://stackoverflow.com/questions/14063070/overlay-a-smaller-image-on-a-larger-image-python-opencv
# http://docs.opencv.org/master/d0/d86/tutorial_py_image_arithmetics.html#gsc.tab=0
# http://docs.opencv.org/3.0-beta/doc/py_tutorials/py_core/py_basic_ops/py_basic_ops.html
# http://stackoverflow.com/questions/32290096/python-opencv-add-alpha-channel-to-rgb-image
# Import the libraries necessary for the system
# External libraries: Numpy and PIL (Python Image Library)
import cv2, sys, os
import numpy as np
from PIL import Image
# Provide the HaarCascades, or face template. Usually found in the OpenCV directory
# The frontal face HaarCascades template captures the face 'skin' region.
cascPath = "opencv-3.1.0/data/haarcascades/haarcascade_frontalface_default.xml"
# Hair and other elements are excluded. However, you're welcome to add more templates,
# such as for detecting eyes, ears, nose, objects, etc. More detection templated
# can be found in the Haarcascades directory
""" Request from the user two inputs """
# Tolerance is the padding for the box around the detected face. Otherwise, if padding is not provided,
# the box will 'stick' to close to the skin of the face.
tolerance = int(raw_input("Please specify the padding, ideally between 20 - 50 (int): "))
# Confidence level outputted by OpenCV is a value between 100 - 0. 100 means that there is absolutely
# no similarity/equality between the detected face and recognized face, 0 means that there is an
# exact match between the recognized face and detected face.
confidence_level = int(raw_input("Please specify the required confidence, ideally <40 (int): "))
# Facial Mapper
faceCascade = cv2.CascadeClassifier(cascPath)
# The training function aggregator. Takes in one argument which is the location of your training set.
# The training set contains images of the face that needs to be recognized. Refer to the following
# tutorial for recommendations on naming:
# http://docs.opencv.org/2.4/modules/contrib/doc/facerec/facerec_tutorial.html
def get_images_and_labels(path = "./faces/"):
# Declare variables
images = []
labels = []
# Append all the absolute image paths in a list image_paths
image_paths = [os.path.join(path, f) for f in os.listdir(path) if f.endswith('.jpg') and f.startswith('1_')]
# Loop through the training images
for image_path in image_paths:
# Read the image and convert to grayscale. All, if not most, algorithms REQUIRE the images
# to be grayscaled in order to capture the contour of the face more easily
image_pil = Image.open(image_path).convert('L')
# Convert the image format into numpy array - this is how OpenCV handles facial structure
image = np.array(image_pil,'uint8')
# Set the label of the image (must be an int)
name = 1
# If you have multiple entities in your images that you'd like to have recognized, then you can
# specify unique label for each persona. For example, label number 1 for yourself,
# label number 2 for someone else, etc.
# Detect the face in the image based on the HaarCascade object. For more configuration, you can pass
# addition variables into the request
faces = faceCascade.detectMultiScale(image)
"""detectMultiScale Expanded"""
# faces = faceCascade.detectMultiScale(
# image, # image source
# scaleFactor=1.1, # magnifies the detected face. Must be more than 1.0
# minNeighbors=5, # neighbors define the elements on the face. Ideal range is between 3 - 6
# minSize=(30, 30), # The minimum size of the detected face. Depends on video frame input size
# flags = 0 # Default value for HaarCascade Sizing
# )
# If face is detected, append the face to images and
# to label the labels
for (x,y,w,h) in faces:
# This shows the detected face in a new window for the person to review
cv2.imshow("Recognizing Face",image[y: y + h, x: x + w])
# Appends the image to the image array
images.append(image[y:y+h,x:x+w])
# Appends the image to the label array
labels.append(name)
# Let us know what's up
print "Adding faces to training set..."
# Pause for 1 second/100ms
cv2.waitKey(100)
# Return the images found and labels
return images, labels
# Initiate the training first
recognizer = cv2.face.createLBPHFaceRecognizer()
# Face recognizer using LBPH Algorithm
# Eigenface Recognizer - createEigenFaceRecognizer()
# Fisherface Recognizer - createFisherFaceRecognizer()
# Local Binary Patterns Histograms Face Recognizer - createLBPHFaceRecognizer()
# An explanation of each algorithm can be found here: http://docs.opencv.org/2.4/modules/contrib/doc/facerec/facerec_tutorial.html
# Call the training set aggregator
images, labels = get_images_and_labels()
# Any open windows from the "imshow" call (line 83)
cv2.destroyAllWindows()
# Conduct the training with the provided images. You can consider this as matrix combination
recognizer.train(images, np.array(labels))
# Open and initiate the default (0) camera.
# You can access different cameras using different ints
video_capture = cv2.VideoCapture(0)
# Declare variables for box/padding optimization.
# Those two points are corner points for a square
pntA = [0,0]
pntB = [0,0]
# Loop through the video frames
while True:
# Capture frame-by-frame. From the documentation:
# The methods/functions combine VideoCapture::grab and VideoCapture::retrieve in one call.
# This is the most convenient method for reading video files or capturing data from
# decode and return the just grabbed frame
ret, frame = video_capture.read()
# Color the frame gray. Since the frame is captured from openCV's Video Capture object,
# the image will already be in an np.array format (unlike PIL where you have to transform)
# the object into an np.array
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# Detect the face
faces = faceCascade.detectMultiScale(
gray,
scaleFactor=1.2,
minNeighbors=6,
minSize=(50, 50),
flags = 0
)
# Draw a rectangle around the faces
# 'frame' is the video frame
for (x, y, w, h) in faces:
# based on the learnings, predict who the individuals are in the video frame
name_predicted, conf = recognizer.predict(gray[y: y + h, x: x + w])
# This is where the distinction is made between individuals
# If you planned on recognizing multiple individuals, then labels can be use (like
# switch command in C)
if conf < confidence_level:
color = (0,255,255)
text = "ADMIN"
overlayImage = True
else:
color = (255,255,255)
text = "ASSET"
overlayImage = False
# Smoothen the detection box
# Smoothen the way how the vertices are detected and how the face is boxed.
# 30 seems to be the ideal tolerance point in my experience. Otherwise, if this padding is changed
# the box will turn out to be very erratic
# Define point A of the rectangle
if x > pntA[0] + tolerance or x < pntA[0] - tolerance:
pntA[0] = x
if y > pntA[1] + tolerance or y < pntA[1] - tolerance:
pntA[1] = y
# Define point B of the rectangle
if (x+w) > pntB[0] + tolerance or (x+w) < pntB[0] - tolerance:
pntB[0] = x + w
if (y+h) > pntB[1] + tolerance or (y+h) < pntB[1] - tolerance:
pntB[1] = y + h
# Draw the rectangle or overlay image
if overlayImage:
# Overlay the Person of Interest image
# Specify the coordinates of the Admin
y_offset = pntA[1] - tolerance
x_offset = pntA[0] - tolerance
# Find the overlay image. Note I use svg because it has scalability capabilities
overlay = cv2.imread("Admin_Square.svg", -1)
# Define the ideal size of the box and stablize
overlay_size = pntB[0] - pntA[0] + 2*tolerance
# Resize the image to fit my face nicely
overlay = cv2.resize(overlay, (overlay_size, overlay_size))
# # SVG's transparent region will turn out black because we're trying to transpose an image that has
# # four channels (rgba) into three channels (rgb), which is the video frame.
# # Capture the shape information of the overlay for image blending purposes
# rows,cols,channels = overlay.shape
# # Specify the region of interest (ROI) where I would like to place the overlay in the frame
# roi = frame[y_offset:y_offset+rows, x_offset:x_offset+cols]
# # Like everything in OpenCV, make sure you color the overlay gray for ease of detection
# gray_overlay = cv2.cvtColor(overlay,cv2.COLOR_BGR2GRAY)
# """Let's create a mask of the image so we can transpose it unto the frame"""
# # What is thresholding? From the documentation:
# # "If pixel value is greater than a threshold value, it is assigned one value (may be white),
# # else it is assigned another value (may be black).
# # The function used is cv2.threshold.
# # First argument is the source image, which should be a grayscale image.
# # Second argument is the threshold value which is used to classify the pixel values."
# # Threshold Binary basically means it's either 1 or 0. In other words, if it's leaning black
# # color it black (value of 10). If leaning white, color it white.
# ret, mask = cv2.threshold(gray_overlay, 0, 255, cv2.THRESH_BINARY)
# # This should be straight forward. It's a bit wise masking condition, where we want to capture
# # the 'black' (empty space) region - which is mask inv
# mask_inv = cv2.bitwise_not(mask)
# # Let's do some image arithmatics on dummy masks
# frame_img = cv2.bitwise_and(roi,roi,mask = mask_inv)
# overlay_img = cv2.bitwise_and(overlay,overlay,mask = mask)
# # NOW, we know that SVG's have 4 channels (RGB + A), but video frames have only 3 (RGB).
# # In order to add them, we need them to be of equal matrix size. Therefore, we should add
# # an extra channel vector for the frame_img OR remove the alpha vector from overlay.
# # Easier to remove the overlay's alpha
# b_channel, g_channel, r_channel, a_channel = cv2.split(overlay_img)
# # If you want to create dummy alpha channel, you can use the following command
# # alpha_channel = np.ones((185, 198)) * 50
# overlay_rgb = cv2.merge((b_channel, g_channel, r_channel))
# # Apply slight blur if needed
# overlay_rgb = cv2.GaussianBlur(overlay_rgb,(11,11),0)
# # overlay_rgb = cv2.bilateralFilter(overlay_rgb,9,75,75)
# # Sum both images given the bit operation - both images MUST be same size AND same channel size
# dst = cv2.add(frame_img,overlay_rgb)
# transpose the image blend into the frame
# frame[y_offset:y_offset+rows, x_offset:x_offset+cols] = dst
# # The following code basically overlays the image within the frame's 'matrix', and ensure
# # the transperancy regions are respected within all color regions
# # To be more concrete, the information you're reading is:
# # frame[x,y,color], where you can split a single pixel into 4 regions:
# # c = 0 is Blue
# # c = 1 is Green
# # c = 2 is Red
# # c = 3 is Alpha (transparency)
# Create the frame ONLY if it's within the range of the video frame. For more detail on what's going on
# here, please read the commented section on top
for c in range(0,3):
if (y_offset > 0 and x_offset > 0) and (y_offset + overlay_size < frame.shape[0] and x_offset + overlay_size < frame.shape[1]):
frame[y_offset:y_offset+overlay.shape[0], x_offset:x_offset+overlay.shape[1], c] = overlay[:,:,c] * (overlay[:,:,3]/255.0) + frame[y_offset:y_offset+overlay.shape[0], x_offset:x_offset+overlay.shape[1], c] * (1.0 - overlay[:,:,3]/255.0)
else:
# Create a simple square for others
cv2.rectangle(frame, (pntA[0]-tolerance, pntA[1]-tolerance), (pntB[0] + tolerance, pntB[1] + tolerance), color, thickness=4, lineType=8, shift=0)
# Draw the text (ADMIN or ASSET)
# Selected font
font = cv2.FONT_HERSHEY_DUPLEX
# Define the position of text
midpoint = (pntA[1]-tolerance) + ((pntB[1] + tolerance) - (pntA[1]-tolerance))/2 + 10
# Put the text
cv2.putText(frame,text,(pntB[0] + tolerance + 20, midpoint), font,1,color)
# Display the resulting frame
cv2.imshow('Video', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# When everything is done, release the capture
video_capture.release()
cv2.destroyAllWindows()