python machine-learning flask computer-vision object-detection

Flask based object detection python script wont show output for IP cameras

I am trying to detect objects in multiple streams of IP Cameras using yolov8 and display the output of these streams with bounding boxes on the browser using flask. However, the output wont show anything. I tried printing text for debugging but it would only print text from ___name___=='___main___' after stopping the process (ctrl+c)

predict_ip.py

from flask import Flask, render_template, Response
from ultralytics import YOLO
import cv2
import threading
import torch 
import argparse

CAMERAS = []

app = Flask(__name__)

class_names = [....]

def generate_frames(models, caps):
    while True:
        # read frames from cameras
        frames = []
        for cap in caps:
            success, img = cap.read()
            if success:
                frames.append(img)
        
        # predict objects in frames from cameras
        results = [model(frames[i], stream=True) for i, model in enumerate(models)]
        for i, result in enumerate(results):
            for r in result:
                boxes = r.boxes
                for box in boxes:
                    .
                    .

        # encode processed frames as jpg images
        for frame in frames:
            _, buffer = cv2.imencode('.jpg', frame)
            frame = buffer.tobytes()
            yield (b'--frame\r\n'
                   b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')


@app.route('/')
def index():
    return render_template('index_ip.html',cameras=CAMERAS)

@app.route('/video_feed')
def video_feed():
    print('IN video feed')
    args = parse_args()
    caps = [cv2.VideoCapture(url) for url in args.urls]
    global CAMERAS
    CAMERAS = [url for url in args.urls]
    models = [YOLO("./models/best_10Class_20Epochs.pt") for i in range(len(args.urls))]
    return Response(generate_frames(models, caps), mimetype='multipart/x-mixed-replace; boundary=frame')

def parse_args():
    print('parsing args')
    parser = argparse.ArgumentParser(description='Multi-Camera Object Detection')
    parser.add_argument('-urls', nargs='+', help='List of URLs for the cameras', required=True)
    args = parser.parse_args()
    print(args.urls)
    return args

if __name__ == '__main__':
    app.run(debug=True, host='0.0.0.0', port=3000)
    print('Main method')

index_ip.html

<!DOCTYPE html>
<html>
<head>
    <title>Multi-Camera Object Detection</title>
    <style>
        .grid-container {
      ......}
    </style>
</head>
<body>
    <div class="grid-container">
        {% for camera in cameras %}
        <div class="grid-item">
            <img src="{{ url_for('video_feed') }}" alt="{{camera}}">
            <div class="overlay">
                <p>Camera {{ camera }}</p>
            </div>
        </div>
        {% endfor %}
    </div>
</body>

</html>

The output in the browser is just a blank screen. For testing, I am using IP Cams available on a website called opentopia. Am running the code as : python predict_ip.py -urls http://xxx.xx.xxx.xx/abc/video.mjpg http://yyy.yy.yy.yy/abc/video.mjpg

All suggestions are much appreciated. Thank you !!

Solution

The issue was with the implementation of the template html and the code both. I used the following code as reference:

app.py

from flask import Flask, render_template, Response
import cv2

app = Flask(__name__)

# list of camera accesses
cameras = ['xyx.haha, abc.hehe']


def find_camera(list_id):
    return cameras[int(list_id)]


def gen_frames(camera_id):
    cam = find_camera(camera_id)  # return the camera access link with credentials. Assume 0?
    # cam = cameras[int(id)]
    cap = cv2.VideoCapture(cam)  # capture the video from the live feed

    while True:

        # # Capture frame-by-frame. Return boolean(True=frame read correctly. )
        success, frame = cap.read()  # read the camera frame
        if not success:
            break
        else:
            ret, buffer = cv2.imencode('.jpg', frame)
            frame = buffer.tobytes()
            yield (b'--frame\r\n'
                   b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')  # concat frame one by one and show result


@app.route('/video_feed/<string:list_id>/', methods=["GET"])
def video_feed(list_id):
    return Response(gen_frames(list_id),
                    mimetype='multipart/x-mixed-replace; boundary=frame')


@app.route('/', methods=["GET"])
def index():
    return render_template('index_ip.html', camera_list=len(cameras), camera=cameras)


if __name__ == '__main__':
    app.run()

index.html

<!DOCTYPE html>
<head>


</head>
<body>

    {% for camera_number in range(0, camera_list) %}
    <div>
        <img src="{{ url_for('video_feed', list_id=camera_number) }}" width="100%"><br/>
    </div>
{% endfor %}

</body>