I have a .h5 model (for GPU?) which I want to run on my CPU. I converted the model using python and it looks like it is really converted, however when running it in a docker tensorrt, I get the error:
[[TRTEngineOp_8]] E0106 21:02:54.141211 1 model_repository_manager.cc:810] failed to load 'retinanet_TRT' version 1: Internal: No OpKernel was registered to support Op 'TRTEngineOp' used by {{node TRTEngineOp_16}}with these attrs: [use_calibration=false, fixed_input_size=true, input_shapes=[[?,?,?,3]], OutT=[DT_FLOAT], precision_mode="FP16", static_engine=false, serialized_segment="\ne\n\021T...2\005VALID", cached_engine_batches=[], InT=[DT_FLOAT], calibration_data="", output_shapes=[[?,?,?,64]], workspace_size_bytes=2127659, max_cached_engines_count=1, segment_funcdef_name="TRTEngineOp_16_native_segment"] Registered devices: [CPU, XLA_CPU] Registered kernels: device='GPU'
What can I do to convert the model so I can use it only with a CPU?
It is converted like this:
with tf.Graph().as_default():
with tf.Session() as sess:
graph = sess.graph
K.set_session(sess)
K.set_learning_phase(0)
inference_model = create_model(num_classes=num_classes)
load_model()
# Find output nodes
outputs, output_node_list = get_nodes_from_model(inference_model.outputs)
# find input nodes
inputs, input_node_list = get_nodes_from_model(inference_model.inputs)
generate_config()
with sess.as_default():
freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(None or []))
output_names = output_node_list or []
output_names += [v.op.name for v in tf.global_variables()]
input_graph_def = graph.as_graph_def()
for node in input_graph_def.node:
# print(node.name)
node.device = ""
frozen_graph = tf.compat.v1.graph_util.convert_variables_to_constants(
sess, input_graph_def, output_names, freeze_var_names)
trt_graph = trt.create_inference_graph(
# frozen model
input_graph_def=frozen_graph,
outputs=output_node_list,
# specify the max workspace
max_workspace_size_bytes=500000000,
# precision, can be "FP32" (32 floating point precision) or "FP16"
precision_mode=precision,
is_dynamic_op=True)
# Finally we serialize and dump the output graph to the filesystem
with tf.gfile.GFile(model_save_path, 'wb') as f:
f.write(trt_graph.SerializeToString())
print("TensorRT model is successfully stored! \n")
is_dynamic_op=True
already helped to convert the model (it now says it is successfully stored), but I can't still load it in the docker TensorRT server.
I am using the nvcr.io/nvidia/tensorflow:19.10-py3 container to convert the models and the nvcr.io/nvidia/tensorrtserver:19.10-py3 container for the TensorRT server.
Just don't convert your model to TensorRT.
with tf.Graph().as_default():
with tf.Session() as sess:
graph = sess.graph
K.set_session(sess)
K.set_learning_phase(0)
inference_model = create_model(num_classes=num_classes)
load_model()
# Find output nodes
outputs, output_node_list = get_nodes_from_model(inference_model.outputs)
# find input nodes
inputs, input_node_list = get_nodes_from_model(inference_model.inputs)
generate_config()
with sess.as_default():
freeze_var_names = list(set(v.op.name for v in tf.global_variables()).difference(None or []))
output_names = output_node_list or []
output_names += [v.op.name for v in tf.global_variables()]
input_graph_def = graph.as_graph_def()
for node in input_graph_def.node:
# print(node.name)
node.device = ""
frozen_graph = tf.compat.v1.graph_util.convert_variables_to_constants(
sess, input_graph_def, output_names, freeze_var_names)
# Finally we serialize and dump the output graph to the filesystem
with tf.gfile.GFile(model_save_path, 'wb') as f:
f.write(frozen_graph.SerializeToString())