Search code examples
pythonopencvflaskyolo

How can I capture detected image of object Yolov3 and display in flask


I am working on Real Time Object Detection using YOLOv3 with OpenCV and Python. It's works well. Currently I try to capture detected image of object and display in flask. Do someone know how to implement this features? Hope someone can helps. I got the tutorial based on this link https://medium.com/analytics-vidhya/real-time-object-detection-using-yolov3-with-opencv-and-python-64c985e14786

import cv2
import numpy as np
import os
import time
import detect as dt
from PIL import Image


labelsPath = os.path.sep.join(["yolo-coco", "coco.names"])
weightsPath = os.path.sep.join(["yolo-coco", "yolov3.weights"])
configPath = os.path.sep.join(["yolo-coco", "yolov3.cfg"])

labelsPath = os.path.sep.join(["yolo-coco", "coco.names"])
LABELS = open(labelsPath).read().strip().split("\n")


net = cv2.dnn.readNet(configPath, weightsPath)

layer_names = net.getLayerNames()
outputlayers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]


colors= np.random.uniform(0,255,size=(len(LABELS),3))

#loading image
cap0=cv2.VideoCapture(0) #0 for 1st webcam
cap1=cv2.VideoCapture(1)
font = cv2.FONT_HERSHEY_PLAIN
starting_time= time.time()
frame_id = 0
count = 0
use_cuda = 1
# configPath.cuda()

while True:
    ret0,frame0= cap0.read() # 
    ret1,frame1= cap1.read() # 

    image = cv2.cvtColor(frame0, cv2.COLOR_BGR2RGB)
    im_pil = Image.fromarray(image)
    im_pil = im_pil.resize((200, 200))
    boxes = dt.do_detect(image, im_pil, 0.5, 0.4, use_cuda)

    if (ret0):
        frame_id+=1
        #print(frame_id)

        height,width,channels = frame0.shape
        #print (frame.shape)
        #detecting objects
        blob = cv2.dnn.blobFromImage(frame0,0.00392,(320,320),(0,0,0),True,crop=False) #reduce 416 to 320    


        net.setInput(blob)
        outs = net.forward(outputlayers)
        #print(outs)
        print(outs[1])

        #Showing info on screen/ get confidence score of algorithm in detecting an object in blob
        class_ids=[]
        confidences=[]
        boxes=[]

        for out in outs:
            #print(out)
            for detection in out:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]
                print(confidence)
                if confidence > 0.3:
                    #onject detected
                    center_x= int(detection[0]*width)
                    center_y= int(detection[1]*height)
                    w = int(detection[2]*width)
                    h = int(detection[3]*height)

                    #cv2.circle(img,(center_x,center_y),10,(0,255,0),2)
                    #rectangle co-ordinaters
                    x=int(center_x - w/2)
                    y=int(center_y - h/2)
                    #cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2)

                    boxes.append([x,y,w,h]) #put all rectangle areas
                    confidences.append(float(confidence)) #how confidence was that object detected and show that percentage
                    class_ids.append(class_id) #name of the object tha was detected

        indexes = cv2.dnn.NMSBoxes(boxes,confidences,0.4,0.6)
        result = open('C:/Users/HP/Miniconda3/envs/count_vechicle/coding/images/frame%04d.txt'%(count), 'w')
        for i in range(len(boxes)):
            result.write(boxes[i])
            count = count + 1
            success, image = vidcap.read()
            result.close()
            if i in indexes:
                x,y,w,h = boxes[i]
                label = str(LABELS[class_ids[i]])
                confidence= confidences[i]
                color = colors[class_ids[i]]
                cv2.rectangle(frame0,(x,y),(x+w,y+h),color,2)
                cv2.putText(frame0,label+" "+str(round(confidence,2)),(x,y+30),font,1,(255,255,255),2)


        elapsed_time = time.time() - starting_time
        fps=frame_id/elapsed_time
        cv2.putText(frame0,"FPS:"+str(round(fps,2)),(10,50),font,2,(0,0,0),1)

        cv2.imshow("Image0",frame0)
        key = cv2.waitKey(1) #wait 1ms the loop will start again and we will process the next frame
    if (ret1):
        cv2.imshow("Image1",frame1)
        key = cv2.waitKey(1) #wait 1ms the loop will start again and we will process the next frame
    if key == 27: #esc key stops the process
        break;

cap0.release()    
cap1.release()
cv2.destroyAllWindows()

Solution

  • Using the bounding box coordinates of the detected object, you can crop a new image out of it and then save it to display.

    Try this:

    # extract the bounding box coordinates
    (x, y) = (boxes[i][0], boxes[i][1])
    (w, h) = (boxes[i][2], boxes[i][3])
    crop_img = frame[y:y + h, x:x + w] #frame of video you are looping through
    cv2.imwrite(<filename>, crop_img)
    

    Update: according to your code :

    if i in indexes:
        x,y,w,h = boxes[i]
        crop_img = frame0[y:y + h, x:x + w]
        cv2.imwrite(<filename>, crop_img)
    

    Using the box coordinates you can crop that part

    Hope it helps.