Search code examples
tensorflowtensorflow-servingtextsum

Tensorflow serving error when connecting with client "input size does not match signature"


I have gone about exporting the textsum model using the export_textsum.py file shown below and when I connect using the textsumclient.py file below I receive the error:

Traceback (most recent call last): File "textsum_client.py", line 90, in tf.app.run() File "/usr/local/lib/python2.7/site-packages/tensorflow/python/platform/app.py", line 48, in run _sys.exit(main(_sys.argv[:1] + flags_passthrough)) File "textsum_client.py", line 83, in main FLAGS.concurrency, FLAGS.num_tests) File "textsum_client.py", line 72, in do_singleDecode result = stub.Predict(request, 5.0) # 5 seconds File "/usr/local/lib/python2.7/site-packages/grpc/beta/_client_adaptations.py", line 324, in call self._request_serializer, self._response_deserializer) File "/usr/local/lib/python2.7/site-packages/grpc/beta/_client_adaptations.py", line 210, in _blocking_unary_unary raise _abortion_error(rpc_error_call) grpc.framework.interfaces.face.face.AbortionError: AbortionError(code=StatusCode.INVALID_ARGUMENT, details="input size does not match signature")

I believe that it may have something to do with the building of tf_example in my export_textsum file but I honestly have not had luck figuring this out as of yet. Anyone with a bit more experience know what I am doing wrong here? If there are any ideas to help me narrow down exactly what is going on here I am open to any advice. Thanks.

textsumclient.py

from __future__ import print_function

import sys
import threading

# This is a placeholder for a Google-internal import.

from grpc.beta import implementations
import numpy
import tensorflow as tf
from datetime import datetime 

from tensorflow_serving.apis import predict_pb2
from tensorflow_serving.apis import prediction_service_pb2
#from tensorflow_serving.example import mnist_input_data


tf.app.flags.DEFINE_integer('concurrency', 1,
                            'maximum number of concurrent inference requests')
tf.app.flags.DEFINE_integer('num_tests', 10, 'Number of test images')
tf.app.flags.DEFINE_string('server', '172.17.0.2:9000', 'PredictionService host:port')
tf.app.flags.DEFINE_string('work_dir', '/tmp', 'Working directory. ')
FLAGS = tf.app.flags.FLAGS


def do_singleDecode(hostport, work_dir, concurrency, num_tests):
  #Connect to server
  host, port = hostport.split(':')
  channel = implementations.insecure_channel(host, int(port))
  stub = prediction_service_pb2.beta_create_PredictionService_stub(channel)

  #Prepare our request object
  request = predict_pb2.PredictRequest()
  request.model_spec.name = 'textsum_model'
  request.model_spec.signature_name = 'predict'  

  #Make some test data
  test_data_set = ['This is a test','This is a sample']

  #Lets test her out
  now = datetime.now()
  article, abstract = test_data_set

  #***** POPULATE REQUEST INPUTS *****

  request.inputs['article'].CopyFrom(
      tf.contrib.util.make_tensor_proto(test_data_set[0], shape=[len(test_data_set[0])]))
  request.inputs['abstract'].CopyFrom(
      tf.contrib.util.make_tensor_proto(test_data_set[1], shape=[len(test_data_set[1])]))


  result = stub.Predict(request, 5.0)  # 5 seconds
  waiting = datetime.now() - now
  return result, waiting.microseconds


def main(_):
  if not FLAGS.server:
      print('please specify server host:port')
      return

  result, waiting = do_singleDecode(FLAGS.server, FLAGS.work_dir,
                            FLAGS.concurrency, FLAGS.num_tests)
  print('\nTextsum result: %s%%' % result)
  print('Waiting time is: ', waiting, 'microseconds.')



if __name__ == '__main__':
    tf.app.run()

export_textsum.py

            decode_mdl_hps = hps
            # Only need to restore the 1st step and reuse it since
            # we keep and feed in state for each step's output.
            decode_mdl_hps = hps._replace(dec_timesteps=1)
            model = seq2seq_attention_model.Seq2SeqAttentionModel(
                decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus)
            decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab)
            serialized_output = tf.placeholder(tf.string, name='tf_output')


            serialized_tf_example = tf.placeholder(tf.string, name='tf_example')
            feature_configs = {
                'article': tf.FixedLenFeature(shape=[1], dtype=tf.string),
                'abstract': tf.FixedLenFeature(shape=[1], dtype=tf.string),
            }
            tf_example = tf.parse_example(serialized_tf_example, feature_configs)

            saver = tf.train.Saver()
            config = tf.ConfigProto(allow_soft_placement = True)

            with tf.Session(config = config) as sess:

                # Restore variables from training checkpoints.
                ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
                if ckpt and ckpt.model_checkpoint_path:
                    saver.restore(sess, ckpt.model_checkpoint_path)
                    global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
                    print('Successfully loaded model from %s at step=%s.' %
                        (ckpt.model_checkpoint_path, global_step))
                else:
                    print('No checkpoint file found at %s' % FLAGS.checkpoint_dir)
                    return

                # ************** EXPORT MODEL ***************
                export_path = os.path.join(FLAGS.export_dir,str(FLAGS.export_version))
                print('Exporting trained model to %s' % export_path)


                #-------------------------------------------

                tensor_info_inputs = tf.saved_model.utils.build_tensor_info(serialized_tf_example)
                tensor_info_outputs = tf.saved_model.utils.build_tensor_info(serialized_output)


                prediction_signature = (
                    tf.saved_model.signature_def_utils.build_signature_def(
                        inputs={ tf.saved_model.signature_constants.PREDICT_INPUTS: tensor_info_inputs},
                        outputs={tf.saved_model.signature_constants.PREDICT_OUTPUTS:tensor_info_outputs},
                        method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME
                        ))

                #----------------------------------

                legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op')
                builder = saved_model_builder.SavedModelBuilder(export_path)

                builder.add_meta_graph_and_variables(
                    sess=sess, 
                    tags=[tf.saved_model.tag_constants.SERVING],
                    signature_def_map={
                        'predict':prediction_signature,
                    },
                    legacy_init_op=legacy_init_op)
                builder.save()

                print('Successfully exported model to %s' % export_path)
    except:
        traceback.print_exc()
        pass


def main(_):
    Export()

if __name__ == "__main__":
    tf.app.run()

Solution

  • QuantumLicht I again just want to thank you for your assistance here as it was one part of my issue. It seemed to have something to do with the keys used in the feature config. I am still using TF 1.2 and I remember reading sometime back that there were some fixes performed for proper key names being able to be used now in newer versions. That said, as I debugged I noticed that it was expecting a single input named "inputs". So I removed "abstract" and set article to inputs. I then had to modify the output of decode and the final issue was related to the fact that I was only loading the model but never running the function against the model to get back the output that I needed to then send into tensor_info_outputs.