Search code examples
pythonnumpyopencvobject-detectionyolo

How can I Run Object detection on GPU?


This program is detecting the things from web cam but it's slow so how can i make it fast for better FPS and how can i use GPU for the faster detection and with better performance. And i how can i make it perfect .In this Program i have used the Yolo configuration and weights with coco dataset .

import cv2
import numpy as np


net = cv2.dnn.readNet('yolov4-custom.cfg', 'yolov4.weights')

classes = []
with open("coco.names", "r") as f:
    classes = f.read().splitlines()

cap = cv2.VideoCapture(0)
#cap = cv2.VideoCapture('videoplayback.mp4')
font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size=(100, 3))

while True:
    _, img = cap.read()
    height, width, _ = img.shape

    blob = cv2.dnn.blobFromImage(img, 1/255, (416, 416), (0,0,0), swapRB=True, crop=False)
    net.setInput(blob)
    output_layers_names = net.getUnconnectedOutLayersNames()
    layerOutputs = net.forward(output_layers_names)

    boxes = []
    confidences = []
    class_ids = []

    for output in layerOutputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.2:
                center_x = int(detection[0]*width)
                center_y = int(detection[1]*height)
                w = int(detection[2]*width)
                h = int(detection[3]*height)

                x = int(center_x - w/2)
                y = int(center_y - h/2)

                boxes.append([x, y, w, h])
                confidences.append((float(confidence)))
                class_ids.append(class_id)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.4)

    if len(indexes)>0:
        for i in indexes.flatten():
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            confidence = str(round(confidences[i],2))
            color = colors[i]
            cv2.rectangle(img, (x,y), (x+w, y+h), color, 2)
            cv2.putText(img, label + " " + confidence, (x, y+20), font, 2, (255,255,255), 2)

    cv2.imshow('Image', img)
    key = cv2.waitKey(1)
    if key==27:
        break

cap.release()
cv2.destroyAllWindows()

Solution

  • To use Gpu we have to compile opencv and this can be done as follows in blog : https://haroonshakeel.medium.com/build-opencv-4-4-0-with-cuda-gpu-support-on-windows-10-without-tears-aa85d470bcd0

    and after doing it add the two lines that will that will detect the Gpu and program will run on GPU.

    import cv2
    import numpy as np
    
    
    net = cv2.dnn.readNet('yolov4-custom.cfg', 'yolov4.weights')
    
    classes = []
    with open("coco.names", "r") as f:
        classes = f.read().splitlines()
    
    # this below two line will help to run the detetection.
    net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
    
    net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
    
    cap = cv2.VideoCapture(0)
    #cap = cv2.VideoCapture('videoplayback.mp4')
    font = cv2.FONT_HERSHEY_PLAIN
    colors = np.random.uniform(0, 255, size=(100, 3))
    
    while True:
        _, img = cap.read()
        height, width, _ = img.shape
    
        blob = cv2.dnn.blobFromImage(img, 1/255, (416, 416), (0,0,0), swapRB=True, crop=False)
        net.setInput(blob)
        output_layers_names = net.getUnconnectedOutLayersNames()
        layerOutputs = net.forward(output_layers_names)
    
        boxes = []
        confidences = []
        class_ids = []
    
        for output in layerOutputs:
            for detection in output:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]
                if confidence > 0.2:
                    center_x = int(detection[0]*width)
                    center_y = int(detection[1]*height)
                    w = int(detection[2]*width)
                    h = int(detection[3]*height)
    
                    x = int(center_x - w/2)
                    y = int(center_y - h/2)
    
                    boxes.append([x, y, w, h])
                    confidences.append((float(confidence)))
                    class_ids.append(class_id)
    
        indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.4)
    
        if len(indexes)>0:
            for i in indexes.flatten():
                x, y, w, h = boxes[i]
                label = str(classes[class_ids[i]])
                confidence = str(round(confidences[i],2))
                color = colors[i]
                cv2.rectangle(img, (x,y), (x+w, y+h), color, 2)
                cv2.putText(img, label + " " + confidence, (x, y+20), font, 2, (255,255,255), 2)
    
        cv2.imshow('Image', img)
        key = cv2.waitKey(1)
        if key==27:
            break
    
    cap.release()
    cv2.destroyAllWindows()