I'm doing a vehicle tracking and counting problem using yolov3 and deepsort. After consulting the source code on the net and implementing, the fps that I achieved was only 2-3 Fps when using yolov3 weight + cfg. I tried yolov3 tiny weight + cfg it gives more fps in about 10 fps but have big problem with ID switch I have Cuda 11.6 and cudnn installed for gpu usage (I use GTX 1650 and cpu: 10300h) I don't know if the problem I'm having is if I'm using the gpu instead of the cpu, or if the yolov3 weight + cfg is slow.
Here is the code in main.py
import cv2
import numpy as np
import time
from deep_sort_yolov3.deep_sort import preprocessing
from deep_sort_yolov3.deep_sort import nn_matching
from deep_sort_yolov3.deep_sort.detection import Detection
from deep_sort_yolov3.deep_sort.tracker import Tracker
from deep_sort_yolov3.deep_sort.detection import Detection as ddet
from deep_sort_yolov3.tools import generate_detections as gdet
from collections import deque
import tensorflow as tf
YOLO_SIZE = 320
net = cv2.dnn.readNet("yolov3_320.weights", "yolov3.cfg")
model_filename = "deep_sort_yolov3\model_data\mars-small128.pb"
layer_names = net.getLayerNames()
output_layers = [layer_names[i- 1] for i in net.getUnconnectedOutLayers()]
max_cosine_distance = 0.5
nms_max_overlap = 0.3
classes = []
nn_budget = None
counter = []
# fps = 0.0
pts = [deque(maxlen=30) for _ in range(9999)]
COLORS = np.random.randint(0, 255, size=(200, 3),
dtype="uint8")
encoder = gdet.create_box_encoder(model_filename, batch_size = 1) #Dữ liệu được encode của từng boxs
metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) # Dùng cosine để so sánh khoảng cách của từng boxs
tracker = Tracker(metric) # track đối tượng dược trên k/c thu được
# Loading camera
#cap = cv2.VideoCapture(0)
cap = cv2.VideoCapture('traffic.mp4')
font = cv2.FONT_HERSHEY_PLAIN
starting_time = time.time()
frame_id = 0
while True:
_, frame = cap.read()
#cv2.resize(frame,(640,480))
frame_id += 1
height, width, channels = frame.shape[:3]
#print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
# Detecting objects
blob = cv2.dnn.blobFromImage(frame, 1/255, (YOLO_SIZE, YOLO_SIZE), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
# Showing informations on the screen
i = 0
indexIDs = []
class_ids = []
confidences = []
bboxes = []
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
# Object detected
center_x = int(detection[0] * width)
center_y = int(detection[1] * height)
w = int(detection[2] * width)
h = int(detection[3] * height)
# Rectangle coordinates
x = int(center_x - w / 2)
y = int(center_y - h / 2)
bboxes.append([x, y, w, h])
confidences.append(float(confidence))
class_ids.append(class_id)
#feature extraction
features = encoder(frame, bboxes)
# import pdb; pdb.set_trace()
detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(bboxes, features)]
#non-maxima supression
boxes = np.array([d.tlwh for d in detections])
scores = np.array([d.confidence for d in detections])
indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
detections =[detections[i] for i in indices]
#Call the tracker
tracker.predict()
tracker.update(detections)
for track in tracker.tracks:
if not track.is_confirmed() or track.time_since_update > 1:
continue
# boxes.append([track[0], track[1], track[2], track[3]])
indexIDs.append(int(track.track_id))
counter.append(int(track.track_id))
bbox = track.to_tlbr()
color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]]
cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (color), 3)
cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1] - 50)), 0, 5e-3 * 150, (color), 2)
i += 1
# bbox_center_point(x,y)
center = (int(((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2))
# track_id[center]
pts[track.track_id].append(center)
thickness = 5
# center point
cv2.circle(frame, (center), 1, color, thickness)
# draw motion path
for j in range(1, len(pts[track.track_id])):
if pts[track.track_id][j - 1] is None or pts[track.track_id][j] is None:
continue
thickness = int(np.sqrt(64 / float(j + 1)) * 2)
cv2.line(frame, (pts[track.track_id][j - 1]), (pts[track.track_id][j]), (color), thickness)
count = len(set(counter))
cv2.putText(frame, "Total Object Counter: " + str(count), (int(20), int(120)), 0, 5e-3 * 200, (0, 255, 0), 2)
cv2.putText(frame, "Current Object Counter: " + str(i), (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2)
elapsed_time = time.time() - starting_time
fps = frame_id / elapsed_time
cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0, 5e-3 * 200, (0, 255, 0), 3)
cv2.namedWindow("YOLO3_Deep_SORT", 0)
cv2.imshow('YOLO3_Deep_SORT', frame)
key = cv2.waitKey(1)
if key == 27:
break
cap.release()
cv2.destroyAllWindows()
Here is the terminal window after running
C:\Users\Duong\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\utils\linear_assignment_.py:22: FutureWarning: The linear_assignment_ module is deprecated in 0.21 and will be removed from 0.23. Use scipy.optimize.linear_sum_assignment instead.
FutureWarning)
2023-03-14 17:37:56.079094: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cudart64_110.dll
2023-03-14 17:37:57.745508: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2023-03-14 17:37:57.746566: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library nvcuda.dll
2023-03-14 17:37:58.755945: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties:
pciBusID: 0000:01:00.0 name: NVIDIA GeForce GTX 1650 computeCapability: 7.5
coreClock: 1.515GHz coreCount: 14 deviceMemorySize: 4.00GiB deviceMemoryBandwidth: 178.84GiB/s
2023-03-14 17:37:58.756274: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cudart64_110.dll
2023-03-14 17:37:58.770488: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cublas64_11.dll
2023-03-14 17:37:58.770666: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cublasLt64_11.dll
2023-03-14 17:37:58.777627: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cufft64_10.dll
2023-03-14 17:37:58.779710: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library curand64_10.dll
2023-03-14 17:37:58.796984: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cusolver64_10.dll
2023-03-14 17:37:58.824496: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cusparse64_11.dll
2023-03-14 17:37:58.825520: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cudnn64_8.dll
2023-03-14 17:37:58.825819: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1862] Adding visible gpu devices: 0
2023-03-14 17:37:58.826281: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-14 17:37:58.827124: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties:
pciBusID: 0000:01:00.0 name: NVIDIA GeForce GTX 1650 computeCapability: 7.5
coreClock: 1.515GHz coreCount: 14 deviceMemorySize: 4.00GiB deviceMemoryBandwidth: 178.84GiB/s
2023-03-14 17:37:58.827465: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cudart64_110.dll
2023-03-14 17:37:58.827582: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cublas64_11.dll
2023-03-14 17:37:58.827701: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cublasLt64_11.dll
2023-03-14 17:37:58.827810: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cufft64_10.dll
2023-03-14 17:37:58.827929: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library curand64_10.dll
2023-03-14 17:37:58.828022: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cusolver64_10.dll
2023-03-14 17:37:58.828115: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cusparse64_11.dll
2023-03-14 17:37:58.828235: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cudnn64_8.dll
2023-03-14 17:37:58.828391: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1862] Adding visible gpu devices: 0
2023-03-14 17:37:59.234835: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1261] Device interconnect StreamExecutor with strength 1 edge matrix:
2023-03-14 17:37:59.235043: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1267] 0
2023-03-14 17:37:59.235230: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1280] 0: N
2023-03-14 17:37:59.235544: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1406] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 2891 MB memory) -> physical GPU (device: 0, name: NVIDIA GeForce GTX 1650, pci bus id: 0000:01:00.0, compute capability: 7.5)
2023-03-14 17:37:59.236423: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set
[ WARN:0@3.712] global net_impl.cpp:174 cv::dnn::dnn4_v20221220::Net::Impl::setUpNet DNN module was not built with CUDA backend; switching to CPU
2023-03-14 17:37:59.922098: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:196] None of the MLIR optimization passes are enabled (registered 0 passes)
2023-03-14 17:38:00.470162: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cublas64_11.dll
2023-03-14 17:38:01.190269: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cublasLt64_11.dll
2023-03-14 17:38:01.191604: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library cudnn64_8.dll
2023-03-14 17:38:01.956778: I tensorflow/core/platform/windows/subprocess.cc:308] SubProcess ended with return code: 0
2023-03-14 17:38:02.009645: I tensorflow/core/platform/windows/subprocess.cc:308] SubProcess ended with return code: 0
C:\Users\Duong\AppData\Local\Programs\Python\Python37\lib\site-packages\sklearn\utils\linear_assignment_.py:128: FutureWarning: The linear_assignment function is deprecated in 0.21 and will be removed from 0.23. Use scipy.optimize.linear_sum_assignment instead.
FutureWarning)
I want to improve the accuracy of the math and at the same time increase the FPS. Thanks!
You should set the network preferred backend and target to be CUDA if you want to run it on a NVIDIA GPU. Take a look at the setPreferableBackend
and setPreferableTarget
documentation.
Also take a look at this official Python script example.