Search code examples
pythonnumpytensorflowobject-detectiontensorflow-lite

Tensorflow Lite Output TypeError (in InferenceTensorflow)


I have a custom trained EfficientDet-Lite0 model. I would like to use it on a raspberry pi 3 with a Picamera. I'm using the picamera2 library, they have some tensorflow example. Previously I tested one of the sample code with pre trained moblienet_V2.tflite model. Probably the code was written for this model. Here is the code:

import argparse

import cv2
import numpy as np
import tflite_runtime.interpreter as tflite

from picamera2 import MappedArray, Picamera2, Preview

normalSize = (640, 480)
lowresSize = (320, 240)

rectangles = []


def ReadLabelFile(file_path):
    with open(file_path, 'r') as f:
        lines = f.readlines()
    ret = {}
    for line in lines:
        pair = line.strip().split(maxsplit=1)
        ret[int(pair[0])] = pair[1].strip()
    return ret


def DrawRectangles(request):
    with MappedArray(request, "main") as m:
        for rect in rectangles:
            print(rect)
            rect_start = (int(rect[0] * 2) - 5, int(rect[1] * 2) - 5)
            rect_end = (int(rect[2] * 2) + 5, int(rect[3] * 2) + 5)
            cv2.rectangle(m.array, rect_start, rect_end, (0, 255, 0, 0))
            if len(rect) == 5:
                text = rect[4]
                font = cv2.FONT_HERSHEY_SIMPLEX
                cv2.putText(m.array, text, (int(rect[0] * 2) + 10, int(rect[1] * 2) + 10),
                            font, 1, (255, 255, 255), 2, cv2.LINE_AA)


def InferenceTensorFlow(image, model, output, label=None):
    global rectangles

    if label:
        labels = ReadLabelFile(label)
    else:
        labels = None

    interpreter = tflite.Interpreter(model_path=model, num_threads=4)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    height = input_details[0]['shape'][1]
    width = input_details[0]['shape'][2]
    floating_model = False
    if input_details[0]['dtype'] == np.float32:
        floating_model = True

    rgb = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
    initial_h, initial_w, channels = rgb.shape

    picture = cv2.resize(rgb, (width, height))

    input_data = np.expand_dims(picture, axis=0)
    if floating_model:
        input_data = (np.float32(input_data) - 127.5) / 127.5

    interpreter.set_tensor(input_details[0]['index'], input_data)

    interpreter.invoke()

    detected_boxes = interpreter.get_tensor(output_details[0]['index'])
    detected_classes = interpreter.get_tensor(output_details[1]['index'])
    detected_scores = interpreter.get_tensor(output_details[2]['index'])
    num_boxes = interpreter.get_tensor(output_details[3]['index'])

    rectangles = []
    for i in range(int(num_boxes)): #throws an error here
        top, left, bottom, right = detected_boxes[0][i]
        classId = int(detected_classes[0][i])
        score = detected_scores[0][i]
        if score > 0.5:
            xmin = left * initial_w
            ymin = bottom * initial_h
            xmax = right * initial_w
            ymax = top * initial_h
            box = [xmin, ymin, xmax, ymax]
            rectangles.append(box)
            if labels:
                print(labels[classId], 'score = ', score)
                rectangles[-1].append(labels[classId])
            else:
                print('score = ', score)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model', help='Path of the detection model.', required=True)
    parser.add_argument('--label', help='Path of the labels file.')
    parser.add_argument('--output', help='File path of the output image.')
    args = parser.parse_args()

    if (args.output):
        output_file = args.output
    else:
        output_file = 'out.jpg'

    if (args.label):
        label_file = args.label
    else:
        label_file = None

    picam2 = Picamera2()
    picam2.start_preview(Preview.QTGL)
    config = picam2.create_preview_configuration(main={"size": normalSize},
                                                 lores={"size": lowresSize, "format": "YUV420"})
    picam2.configure(config)

    stride = picam2.stream_configuration("lores")["stride"]
    picam2.post_callback = DrawRectangles

    picam2.start()

    while True:
        buffer = picam2.capture_buffer("lores")
        grey = buffer[:stride * lowresSize[1]].reshape((lowresSize[1], stride))
        _ = InferenceTensorFlow(grey, args.model, output_file, label_file)


if __name__ == '__main__':
    main()

When I run this code from the directory with this command:

python3 real_time_with_labels.py --model mymodel.tflite --label mylabels.txt

I get this error message:

[2:34:39.571116147] [3140]  INFO Camera camera_manager.cpp:299 libcamera v0.0.3+40-9b860a66
[2:34:39.736783793] [3141]  INFO RPI raspberrypi.cpp:1425 Registered camera /base/soc/i2c0mux/i2c@1/imx708@1a to Unicam device /dev/media3 and ISP device /dev/media0
[2:34:41.621879691] [3140]  INFO Camera camera.cpp:1028 configuring streams: (0) 320x320-XBGR8888 (1) 320x320-YUV420
[2:34:41.622729767] [3141]  INFO RPI raspberrypi.cpp:805 Sensor: /base/soc/i2c0mux/i2c@1/imx708@1a - Selected sensor format: 1536x864-SBGGR10_1X10 - Selected unicam format: 1536x864-pBAA
Traceback (most recent call last):
  File "/home/miscope/picamera2/examples/tensorflow/real_time_with_labels.py", line 154, in <module>
    main()
  File "/home/miscope/picamera2/examples/tensorflow/real_time_with_labels.py", line 150, in main
    _ = InferenceTensorFlow(grey, args.model, output_file, label_file)
  File "/home/miscope/picamera2/examples/tensorflow/real_time_with_labels.py", line 101, in InferenceTensorFlow
    for i in range(int(detected_boxes)):
TypeError: only size-1 arrays can be converted to Python scalars

What should I do to avoid this in the future? Hope someone can help.

thanks


Solution

  • You are correct. It's because the code was written for a different model. how you pull the information out of the model will need to change.

    before you call interpreter.set_tensor you'll need to grab some model info:

        # Check output layer name to determine if this model was created with TF2 or TF1,
        # because outputs are ordered differently for TF2 and TF1 models
        outname = output_details[0]['name']
    
        if ('StatefulPartitionedCall' in outname): # This is a TF2 model
            boxes_idx, classes_idx, scores_idx = 1, 3, 0
        else: # This is a TF1 model
            boxes_idx, classes_idx, scores_idx = 0, 1, 2
    
        # then invoke
        interpreter.set_tensor(input_details[0]['index'], input_data)
    
        interpreter.invoke()
    
    

    after this, how you grab the detected information will also need to change:

        detected_boxes = interpreter.get_tensor(output_details[boxes_idx]['index']) # Bounding box coordinates of detected objects
        detected_classes = interpreter.get_tensor(output_details[classes_idx]['index']) # Class index of detected objects
        detected_scores = interpreter.get_tensor(output_details[scores_idx]['index']) # Confidence of detected objects
        # you no longer need num_boxes
    
    

    lastly, how you loop over the detected scores can be simplified by changing your for loop to

        for i in range(len(detected_scores[0])):
    

    TL;DR can't grab data out of the two different models the same way.