I have a small python script using cv2 to capture the first face detected and display that region only in a cv2 window. Everything works great.
Currently, the video feed will freeze when minimized. How can I allow my script to keep capturing the video if I minimize the cv2 window to the tray?
EDIT
I would also like to know if there is a better approach to this so I can reduce the load on the CPU. Currently running this script will use 14 - 20% of my cpu.
from __future__ import division
from imutils.video import VideoStream
import face_recognition
import imutils
import cv2
POINTS = []
def landmarkTrackSmoothing(box, factor, maxPoints=30):
top = box[0][0]
bottom = box[0][1]
left = box[0][2]
right = box[0][3]
if len(POINTS) < maxPoints:
maxPoints = len(POINTS)
else:
del POINTS[0]
POINTS.append([top, bottom, left, right])
mean = [int((sum(col)/len(col))/factor) for col in zip(*POINTS)]
return mean
def cartoonFilter(roi):
# 1) Edges
gray = cv2.cvtColor(roi, cv2.COLOR_RGB2GRAY)
gray = cv2.medianBlur(gray, 5)
edges = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 9, 9)
# 2) Color
color = cv2.bilateralFilter(roi, 9, 300, 300)
# 3) Cartoon
return cv2.bitwise_and(color, color, mask=edges)
def OpenCamera():
vs = VideoStream(0 + cv2.CAP_DSHOW, framerate=120).start()
vs.stream.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
vs.stream.set(cv2.CAP_PROP_FRAME_HEIGHT, 1024)
roi = [0, 0, 0, 0]
prev = [0, 0, 0, 0]
# Add filter flags
cartoonEffect = False
# loop over frames from the video file stream
while True:
# grab the frame from the threaded video stream
frame = vs.read()
# downscale and convert to grayscale for fast processing
# of landmark locations
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
gray = imutils.resize(frame, width=240)
# calculate upscale factor for landmark locations
factor = float(gray.shape[1]) / frame.shape[1]
# detect the (x, y)-coordinates of the bounding boxes
# corresponding to each face in the input frame, then
# the facial embeddings for each face
boxes = face_recognition.face_locations(gray)
box = list(map(list, boxes))
# t, b, l, r = 0, 0, 0, 0
# upscale landmark locations
for i in range(len(box)):
box = [landmarkTrackSmoothing(box, factor)]
# loop over the recognized faces
if (len(box) > 0):
i = 0
for (top, right, bottom, left) in box:
# grab frames from face coordinates
if (i == 0):
roi = frame[top:bottom, left:right]
prev = top, bottom, left, right
if cartoonEffect:
roi = cartoonFilter(roi)
i += 1
# check to see if we are supposed to display the output frame to
# the screen
if (len(box) == 0):
if (prev[0] > 0):
roi = frame[prev[0]:prev[1], prev[2]:prev[3]]
else:
roi = frame
cv2.namedWindow("Frame", cv2.WINDOW_NORMAL)
if (roi.any()):
cv2.imshow("Frame", roi)
cv2.resizeWindow("Frame", 512, 512)
# continue looping until quit: expandable to add dynamic key commands for filters
key = cv2.waitKey(1) & 0xFF
if key == ord("q"):
break
if key == ord('c'):
if cartoonEffect:
cartoonEffect = False
else:
cartoonEffect = True
# do a bit of cleanup on quit
cv2.destroyAllWindows()
vs.stop()
# Begin capturing
OpenCamera()
I managed to reduce my CPU usage to 1-10% using haarcascade instead of what i was doing. Still not able to run camera capture when window is minimized.
final solution.
import cv2
import dlib
import time
POINTS = []
def followFaceSmoothing(roi, maxPoints=30):
top = roi[0]
bottom = roi[1]
left = roi[2]
right = roi[3]
if len(POINTS) < maxPoints:
maxPoints = len(POINTS)
else:
del POINTS[0]
POINTS.append([top, bottom, left, right])
mean = [int((sum(col)/len(col))) for col in zip(*POINTS)]
return mean
# Initialize a face cascade using the frontal face haar cascade provided with
# the OpenCV library
faceCascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
def detectAndTrackLargestFace():
# Capture video frames
capture = cv2.VideoCapture(0)
roi, lastroi = [], []
# Create opencv named window
cv2.namedWindow("Follow Face", cv2.WINDOW_NORMAL)
# Start the window thread for the window we are using
cv2.startWindowThread()
# Create the tracker we will use
tracker = dlib.correlation_tracker()
# The variable we use to keep track of the fact whether we are
# currently using the dlib tracker
trackingFace = 0
# Variable to track frame count so we can refresh landmark detection
frames = 0
try:
while True:
# Retrieve the latest image from the webcam
rc, img = capture.read()
# Check if a key was pressed and if it was Q, then destroy all
# opencv windows and exit the application
pressedKey = cv2.waitKey(2)
if pressedKey == ord('Q'):
cv2.destroyAllWindows()
exit(0)
elif pressedKey == ord('R'): # reset landmark detection to refresh img
roi = []
trackingFace = 0
# If we are not tracking a face, then try to detect one
if not trackingFace:
# convert the img to gray-based image
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# find all faces
faces = faceCascade.detectMultiScale(
gray, minNeighbors=10, minSize=(50, 50), maxSize=(300, 300))
# get largest face based on the largest
# area. initialize the required variables to 0
maxArea = 0
x = 0
y = 0
w = 0
h = 0
# Loop over faces and check if the area is the
# largest convert it to int fordlib tracker.
for (_x, _y, _w, _h) in faces:
if _w*_h > maxArea:
x = int(_x)
y = int(_y)
w = int(_w)
h = int(_h)
maxArea = w*h
# If one or more faces are found, initialize the tracker
# on the largest face
if maxArea > 0:
# Initialize the tracker
tracker.start_track(img,
dlib.rectangle(x-10,
y-20,
x+w+10,
y+h+20))
# Set the indicator variable when actively tracking region in the image
trackingFace = 1
time.sleep(0.06)
# Check if the tracker is actively tracking a region in the image
if trackingFace:
# Update the tracker and request quality of the tracking update
trackingQuality = tracker.update(img)
# determine the updated position of the tracked region and crop
if trackingQuality >= 8.75:
tracked_position = tracker.get_position()
t_x = int(tracked_position.left())
t_y = int(tracked_position.top())
t_w = int(tracked_position.width())
t_h = int(tracked_position.height())
roi = t_y, t_y+t_h, t_x, t_x+t_w
if (roi[0] > 0):
roi = followFaceSmoothing(roi)
lastroi = roi
else:
trackingFace = 0
# reset every 60 frames to refresh face tracking
frames += 1
if (frames > 59):
frames = 0
roi = []
trackingFace = 0
# show the image on the screen
if img.any():
if (len(lastroi) > 0):
img = img[lastroi[0]:lastroi[1], lastroi[2]:lastroi[3]]
cv2.imshow("Follow Face", img)
cv2.resizeWindow("Follow Face", 320, 320)
except:
pass
if __name__ == '__main__':
detectAndTrackLargestFace()