Search code examples
tensorflowcputensorrt

Use .h5 model with nvidia tensorrt on cpu instead of gpu


I have a .h5 model (for GPU?) which I want to run on my CPU. I converted the model using python and it looks like it is really converted, however when running it in a docker tensorrt, I get the error:

     [[TRTEngineOp_8]]
E0106 21:02:54.141211 1 model_repository_manager.cc:810] failed to load 'retinanet_TRT' version 1: Internal: No OpKernel was registered to support Op 'TRTEngineOp' used by {{node TRTEngineOp_16}}with these attrs: [use_calibration=false, fixed_input_size=true, input_shapes=[[?,?,?,3]], OutT=[DT_FLOAT], precision_mode="FP16", static_engine=false, serialized_segment="\ne\n\021T...2\005VALID", cached_engine_batches=[], InT=[DT_FLOAT], calibration_data="", output_shapes=[[?,?,?,64]], workspace_size_bytes=2127659, max_cached_engines_count=1, segment_funcdef_name="TRTEngineOp_16_native_segment"]
Registered devices: [CPU, XLA_CPU]
Registered kernels:
  device='GPU'

What can I do to convert the model so I can use it only with a CPU?

It is converted like this:

with tf.Graph().as_default():
    with tf.Session() as sess:
        graph = sess.graph
        K.set_session(sess)
        K.set_learning_phase(0)
        inference_model = create_model(num_classes=num_classes)
        load_model()

        # Find output nodes
        outputs, output_node_list = get_nodes_from_model(inference_model.outputs)
        # find input nodes
        inputs, input_node_list = get_nodes_from_model(inference_model.inputs)

        generate_config()

        with sess.as_default():
            freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(None or []))
            output_names = output_node_list or []
            output_names += [v.op.name for v in tf.global_variables()]
            input_graph_def = graph.as_graph_def()
            for node in input_graph_def.node:
                # print(node.name)
                node.device = ""
            frozen_graph = tf.compat.v1.graph_util.convert_variables_to_constants(
                sess, input_graph_def, output_names, freeze_var_names)
            trt_graph = trt.create_inference_graph(
                # frozen model
                input_graph_def=frozen_graph,
                outputs=output_node_list,
                # specify the max workspace
                max_workspace_size_bytes=500000000,
                # precision, can be "FP32" (32 floating point precision) or "FP16"
                precision_mode=precision,
                is_dynamic_op=True)
            # Finally we serialize and dump the output graph to the filesystem
            with tf.gfile.GFile(model_save_path, 'wb') as f:
                f.write(trt_graph.SerializeToString())

            print("TensorRT model is successfully stored! \n")

is_dynamic_op=True already helped to convert the model (it now says it is successfully stored), but I can't still load it in the docker TensorRT server.

I am using the nvcr.io/nvidia/tensorflow:19.10-py3 container to convert the models and the nvcr.io/nvidia/tensorrtserver:19.10-py3 container for the TensorRT server.


Solution

  • Just don't convert your model to TensorRT.

    with tf.Graph().as_default():
        with tf.Session() as sess:
            graph = sess.graph
            K.set_session(sess)
            K.set_learning_phase(0)
            inference_model = create_model(num_classes=num_classes)
            load_model()
    
            # Find output nodes
            outputs, output_node_list = get_nodes_from_model(inference_model.outputs)
            # find input nodes
            inputs, input_node_list = get_nodes_from_model(inference_model.inputs)
    
            generate_config()
    
            with sess.as_default():
                freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(None or []))
                output_names = output_node_list or []
                output_names += [v.op.name for v in tf.global_variables()]
                input_graph_def = graph.as_graph_def()
                for node in input_graph_def.node:
                    # print(node.name)
                    node.device = ""
                frozen_graph = tf.compat.v1.graph_util.convert_variables_to_constants(
                    sess, input_graph_def, output_names, freeze_var_names)
    
                # Finally we serialize and dump the output graph to the filesystem
                with tf.gfile.GFile(model_save_path, 'wb') as f:
                    f.write(frozen_graph.SerializeToString())