Search code examples
pythonopencvgoogle-colaboratoryyolodarknet

Yolov4 tiny and Darknet and Colab results in error


I am trying to process images on Google Colab with Yolov4 tiny and darknet framework. The code works perfectly on my local MAC (2021, M1) but in Colab I am getting an error.

Thank you very much for your help!

!rm darknet
# clone darknet repo
!git clone https://github.com/AlexeyAB/darknet
# change makefile to have GPU and OPENCV enabled
%cd darknet
!wget https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights
!sed -i 's/OPENCV=0/OPENCV=1/' Makefile
!sed -i 's/GPU=0/GPU=1/' Makefile
!sed -i 's/CUDNN=0/CUDNN=1/' Makefile
!sed -i 's/CUDNN_HALF=0/CUDNN_HALF=1/' Makefile

!make


model_config='/content/darknet/cfg/yolov4-tiny.cfg'
model_names='/content/darknet/cfg/coco.names'
model_weights='/content/darknet/yolov4-tiny.weights'

import cv2
import numpy as np
    
class YoloDetection():
    def __init__(self, model_path: str, config: str, classes: str, width: int, height: int,
             scale=0.00392, thr=0.4, nms=0.4, backend=0,
             framework=3,
             target=0, mean=[0, 0, 0]):
    
        super(YoloDetection,self).__init__()
        choices = ['caffe', 'tensorflow', 'torch', 'darknet']
        backends = (
        cv2.dnn.DNN_BACKEND_DEFAULT, cv2.dnn.DNN_BACKEND_HALIDE, cv2.dnn.DNN_BACKEND_INFERENCE_ENGINE,
        cv2.dnn.DNN_BACKEND_OPENCV)
        targets = (
        cv2.dnn.DNN_TARGET_CPU, cv2.dnn.DNN_TARGET_OPENCL, cv2.dnn.DNN_TARGET_OPENCL_FP16, cv2.dnn.DNN_TARGET_MYRIAD)

        self.__confThreshold = thr
        self.__nmsThreshold = nms
        self.__mean = mean
        self.__scale = scale
        self.__width = width
        self.__height = height

        # Load a network
        self.__net = cv2.dnn.readNet(model_path, config, choices[framework])
        self.__net.setPreferableBackend(backends[backend])
        self.__net.setPreferableTarget(targets[target])
        self.__classes = None

        if classes:
            with open(classes, 'rt') as f:
                self.__classes = f.read().rstrip('\n').split('\n')


    def get_output_layers_name(self, net):
        all_layers_names = net.getLayerNames()
        return [all_layers_names[i-1] for i in net.getUnconnectedOutLayers()]

    def post_process_output(self, frame, outs):
        frame_height = frame.shape[0]
        frame_width = frame.shape[1]

        class_ids = []
        confidences = []
        boxes = []

        class_ids = []
        confidences = []
        boxes = []
        for out in outs:
            for detection in out:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]
                if confidence > self.__confThreshold:
                    center_x = int(detection[0] * frame_width)
                    center_y = int(detection[1] * frame_height)
                    width = int(detection[2] * frame_width)
                    height = int(detection[3] * frame_height)
                    left = center_x - width / 2
                    top = center_y - height / 2
                    class_ids.append(class_id)
                    confidences.append(float(confidence))
                    boxes.append([left, top, width, height])

    
        indices = cv2.dnn.NMSBoxes(boxes, confidences, self.__confThreshold, self.__nmsThreshold)
        return (indices, boxes, confidences, class_ids)

    def process_frame(self, frame: np.ndarray):
        frame_height = frame.shape[0]
        frame_width = frame.shape[1]

        blob = cv2.dnn.blobFromImage(frame, self.__scale, (self.__width, self.__height), self.__mean, True, crop=False)

        # Run a model
        self.__net.setInput(blob)
        outs = self.__net.forward(self.get_output_layers_name(self.__net))
        (indices, boxes, confidences, class_ids) = self.post_process_output(frame, outs)
        detected_objects = []

        for i in indices:
        
            box = boxes[i]
            left = box[0]
            top = box[1]
            width = box[2]
            height = box[3]
            x = int(left)
            y = int(top)
            nw = int(width)
            nh = int(height)
            if x < 0:
                x = 0
            if y < 0:
                y = 0
            if x + nw > frame_width:
                nw = frame_width - x
            if y + nh > frame_height:
                nh = frame_height - y
            detected_objects.append([self.__classes[class_ids[i]], x, y, nw, nh, confidences[i]])
        return detected_objects


model = YoloDetection(model_weights,
                  model_config,
                  model_names,
                  416,
                  416)

!wget https://image.stern.de/8424922/t/8I/v2/w1440/r0/-/30--artikel22517bild01jpg---b5e7066e38d38876.jpg
pic = cv2.imread('/content/darknet/30--artikel22517bild01jpg---b5e7066e38d38876.jpg', 0) 
model.process_frame(pic)

This code results in: 'TypeError: only integer scalar arrays can be converted to a scalar index'

https://colab.research.google.com/drive/1ozyHkg3CVyzDqmO7gHYsNZ6TdWmlYMUQ?usp=sharing

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-1-d75e91783e3e> in <module>()
    130 get_ipython().system('wget https://image.stern.de/8424922/t/8I/v2/w1440/r0/-/30--artikel22517bild01jpg---b5e7066e38d38876.jpg')
    131 pic = cv2.imread('/content/darknet/30--artikel22517bild01jpg---b5e7066e38d38876.jpg', 0)
--> 132 model.process_frame(pic)

2 frames
<ipython-input-1-d75e91783e3e> in process_frame(self, frame)
 95         # Run a model
 96         self.__net.setInput(blob)
---> 97         outs = self.__net.forward(self.get_output_layers_name(self.__net))
 98         (indices, boxes, confidences, class_ids) = self.post_process_output(frame, outs)
 99         detected_objects = []

<ipython-input-1-d75e91783e3e> in get_output_layers_name(self, net)
 54     def get_output_layers_name(self, net):
 55         all_layers_names = net.getLayerNames()
---> 56         return [all_layers_names[i-1] for i in net.getUnconnectedOutLayers()]
 57 
 58     def post_process_output(self, frame, outs):

<ipython-input-1-d75e91783e3e> in <listcomp>(.0)
 54     def get_output_layers_name(self, net):
 55         all_layers_names = net.getLayerNames()
---> 56         return [all_layers_names[i-1] for i in net.getUnconnectedOutLayers()]
 57 
 58     def post_process_output(self, frame, outs):

TypeError: only integer scalar arrays can be converted to a scalar index

Solution

  • tl; dr: please replace your get_output_layers_name() function with a simple:

    def get_output_layers_name(self, net):
        return net.getUnconnectedOutLayersNames()
    

    opencv's python wrappers for std::vector have changed lately, there used to be an artificial dimension added, which is no more, so

    ---> 56 return [all_layers_names[i-1] for i in net.getUnconnectedOutLayers()]

    is for versions later than 4.5, while it was

    ---> 56 return [all_layers_names[0][i-1] for i in net.getUnconnectedOutLayers()]

    before. (and you seem to run it on colab, which has outdated 4.1.0 by default.

    again, please avoid all that confusion, and use the simplified code above !