I have a custom trained EfficientDet-Lite0 model. I would like to use it on a raspberry pi 3 with a Picamera. I'm using the picamera2 library, they have some tensorflow example. Previously I tested one of the sample code with pre trained moblienet_V2.tflite model. Probably the code was written for this model. Here is the code:
import argparse
import cv2
import numpy as np
import tflite_runtime.interpreter as tflite
from picamera2 import MappedArray, Picamera2, Preview
normalSize = (640, 480)
lowresSize = (320, 240)
rectangles = []
def ReadLabelFile(file_path):
with open(file_path, 'r') as f:
lines = f.readlines()
ret = {}
for line in lines:
pair = line.strip().split(maxsplit=1)
ret[int(pair[0])] = pair[1].strip()
return ret
def DrawRectangles(request):
with MappedArray(request, "main") as m:
for rect in rectangles:
print(rect)
rect_start = (int(rect[0] * 2) - 5, int(rect[1] * 2) - 5)
rect_end = (int(rect[2] * 2) + 5, int(rect[3] * 2) + 5)
cv2.rectangle(m.array, rect_start, rect_end, (0, 255, 0, 0))
if len(rect) == 5:
text = rect[4]
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(m.array, text, (int(rect[0] * 2) + 10, int(rect[1] * 2) + 10),
font, 1, (255, 255, 255), 2, cv2.LINE_AA)
def InferenceTensorFlow(image, model, output, label=None):
global rectangles
if label:
labels = ReadLabelFile(label)
else:
labels = None
interpreter = tflite.Interpreter(model_path=model, num_threads=4)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
height = input_details[0]['shape'][1]
width = input_details[0]['shape'][2]
floating_model = False
if input_details[0]['dtype'] == np.float32:
floating_model = True
rgb = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
initial_h, initial_w, channels = rgb.shape
picture = cv2.resize(rgb, (width, height))
input_data = np.expand_dims(picture, axis=0)
if floating_model:
input_data = (np.float32(input_data) - 127.5) / 127.5
interpreter.set_tensor(input_details[0]['index'], input_data)
interpreter.invoke()
detected_boxes = interpreter.get_tensor(output_details[0]['index'])
detected_classes = interpreter.get_tensor(output_details[1]['index'])
detected_scores = interpreter.get_tensor(output_details[2]['index'])
num_boxes = interpreter.get_tensor(output_details[3]['index'])
rectangles = []
for i in range(int(num_boxes)): #throws an error here
top, left, bottom, right = detected_boxes[0][i]
classId = int(detected_classes[0][i])
score = detected_scores[0][i]
if score > 0.5:
xmin = left * initial_w
ymin = bottom * initial_h
xmax = right * initial_w
ymax = top * initial_h
box = [xmin, ymin, xmax, ymax]
rectangles.append(box)
if labels:
print(labels[classId], 'score = ', score)
rectangles[-1].append(labels[classId])
else:
print('score = ', score)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--model', help='Path of the detection model.', required=True)
parser.add_argument('--label', help='Path of the labels file.')
parser.add_argument('--output', help='File path of the output image.')
args = parser.parse_args()
if (args.output):
output_file = args.output
else:
output_file = 'out.jpg'
if (args.label):
label_file = args.label
else:
label_file = None
picam2 = Picamera2()
picam2.start_preview(Preview.QTGL)
config = picam2.create_preview_configuration(main={"size": normalSize},
lores={"size": lowresSize, "format": "YUV420"})
picam2.configure(config)
stride = picam2.stream_configuration("lores")["stride"]
picam2.post_callback = DrawRectangles
picam2.start()
while True:
buffer = picam2.capture_buffer("lores")
grey = buffer[:stride * lowresSize[1]].reshape((lowresSize[1], stride))
_ = InferenceTensorFlow(grey, args.model, output_file, label_file)
if __name__ == '__main__':
main()
When I run this code from the directory with this command:
python3 real_time_with_labels.py --model mymodel.tflite --label mylabels.txt
I get this error message:
[2:34:39.571116147] [3140] INFO Camera camera_manager.cpp:299 libcamera v0.0.3+40-9b860a66
[2:34:39.736783793] [3141] INFO RPI raspberrypi.cpp:1425 Registered camera /base/soc/i2c0mux/i2c@1/imx708@1a to Unicam device /dev/media3 and ISP device /dev/media0
[2:34:41.621879691] [3140] INFO Camera camera.cpp:1028 configuring streams: (0) 320x320-XBGR8888 (1) 320x320-YUV420
[2:34:41.622729767] [3141] INFO RPI raspberrypi.cpp:805 Sensor: /base/soc/i2c0mux/i2c@1/imx708@1a - Selected sensor format: 1536x864-SBGGR10_1X10 - Selected unicam format: 1536x864-pBAA
Traceback (most recent call last):
File "/home/miscope/picamera2/examples/tensorflow/real_time_with_labels.py", line 154, in <module>
main()
File "/home/miscope/picamera2/examples/tensorflow/real_time_with_labels.py", line 150, in main
_ = InferenceTensorFlow(grey, args.model, output_file, label_file)
File "/home/miscope/picamera2/examples/tensorflow/real_time_with_labels.py", line 101, in InferenceTensorFlow
for i in range(int(detected_boxes)):
TypeError: only size-1 arrays can be converted to Python scalars
What should I do to avoid this in the future? Hope someone can help.
thanks
You are correct. It's because the code was written for a different model. how you pull the information out of the model will need to change.
before you call interpreter.set_tensor
you'll need to grab some model info:
# Check output layer name to determine if this model was created with TF2 or TF1,
# because outputs are ordered differently for TF2 and TF1 models
outname = output_details[0]['name']
if ('StatefulPartitionedCall' in outname): # This is a TF2 model
boxes_idx, classes_idx, scores_idx = 1, 3, 0
else: # This is a TF1 model
boxes_idx, classes_idx, scores_idx = 0, 1, 2
# then invoke
interpreter.set_tensor(input_details[0]['index'], input_data)
interpreter.invoke()
after this, how you grab the detected information will also need to change:
detected_boxes = interpreter.get_tensor(output_details[boxes_idx]['index']) # Bounding box coordinates of detected objects
detected_classes = interpreter.get_tensor(output_details[classes_idx]['index']) # Class index of detected objects
detected_scores = interpreter.get_tensor(output_details[scores_idx]['index']) # Confidence of detected objects
# you no longer need num_boxes
lastly, how you loop over the detected scores can be simplified by changing your for loop to
for i in range(len(detected_scores[0])):
TL;DR can't grab data out of the two different models the same way.