How can I do simple matmul on edge tpu?

I can't work out how to invoke my .tflite model that does matmul on the coral accelerator using the python api.

The .tflite model is generated from some example code here. It works well using the tf.lite.Interpreter() class but I don't know how to transform it to work with the edgetpu class. I have tried edgetpu.basic.basic_engine.BasicEngine() by changing the models datatype from numpy.float32 to numpy.uint8, but that did not help. I am a complete beginner with TensorFlow and just want to use my tpu for matmul.

import numpy
import tensorflow as tf
import edgetpu
from edgetpu.basic.basic_engine import BasicEngine

def export_tflite_from_session(session, input_nodes, output_nodes, tflite_filename):
    print("Converting to tflite...")
    converter = tf.lite.TFLiteConverter.from_session(session, input_nodes, output_nodes)
    tflite_model = converter.convert()
    with open(tflite_filename, "wb") as f:
        f.write(tflite_model)
    print("Converted %s." % tflite_filename)

#This does matmul just fine but does not use the TPU
def test_tflite_model(tflite_filename, examples):
    print("Loading TFLite interpreter for %s..." % tflite_filename)
    interpreter = tf.lite.Interpreter(model_path=tflite_filename)
    interpreter.allocate_tensors()
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    print("input details: %s" % input_details)
    print("output details: %s" % output_details)

    for i, input_tensor in enumerate(input_details):
        interpreter.set_tensor(input_tensor['index'], examples[i])
    interpreter.invoke()
    model_output = []
    for i, output_tensor in enumerate(output_details):
        model_output.append(interpreter.get_tensor(output_tensor['index']))
    return model_output

#this should use the TPU, but I don't know how to run the model or if it needs
#further processing. One matrix can be constant for my use case
def test_tpu(tflite_filename,examples):
    print("Loading TFLite interpreter for %s..." % tflite_filename)
    #TODO edgetpu.basic
    interpreter = BasicEngine(tflite_filename)
    interpreter.allocate_tensors()#does not work...

def main():
    tflite_filename = "model.tflite"
    shape_a = (2, 2)
    shape_b = (2, 2)

    a = tf.placeholder(dtype=tf.float32, shape=shape_a, name="A")
    b = tf.placeholder(dtype=tf.float32, shape=shape_b, name="B")
    c = tf.matmul(a, b, name="output")

    numpy.random.seed(1234)
    a_ = numpy.random.rand(*shape_a).astype(numpy.float32)
    b_ = numpy.random.rand(*shape_b).astype(numpy.float32)
    with tf.Session() as session:
        session_output = session.run(c, feed_dict={a: a_, b: b_})
        export_tflite_from_session(session, [a, b], [c], tflite_filename)

    tflite_output = test_tflite_model(tflite_filename, [a_, b_])
    tflite_output = tflite_output[0]

    #test the TPU
    tflite_output = test_tpu(tflite_filename, [a_, b_])

    print("Input example:")
    print(a_)
    print(a_.shape)
    print(b_)
    print(b_.shape)
    print("Session output:")
    print(session_output)
    print(session_output.shape)
    print("TFLite output:")
    print(tflite_output)
    print(tflite_output.shape)
    print(numpy.allclose(session_output, tflite_output))

if __name__ == '__main__':
    main()

Solution

You're only converting your model once, and your model is not fully compiled for the Edge TPU. From the docs:

At the first point in the model graph where an unsupported operation occurs, the compiler partitions the graph into two parts. The first part of the graph that contains only supported operations is compiled into a custom operation that executes on the Edge TPU, and everything else executes on the CPU

There are several specific requirements that the model must meet:

quantization-aware training
constant tensor sizes and model parameters at compile time
tensors are 3-dimensional or smaller.
models only use operations supported by the Edge TPU.

There is an online compiler as well as a CLI version that is useful for translating .tflite models into Edge TPU compatible .tflite models.

Your code is also incomplete. You've passed your model to the class here:

interpreter = BasicEngine(tflite_filename)

but you're missing the step of actually running the inference on the tensor:

output = RunInference(interpreter)