Search code examples
tensorflowembeddingtensorboardtext-classificationprojector

Tensorboard Embedding Visualization


I am working on a text classification problem. I have 3M+ rows which need to be classified into 20 categories.

Following are two code snippets from my entire code:

This is the code where my tf variables are defined.:

 class TextCNNRNN(object):
    def __init__(self, embedding_mat, non_static, hidden_unit, sequence_length, max_pool_size,
                 num_classes, embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0):

        self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name='input_x')
        self.input_y = tf.placeholder(tf.float32, [None, num_classes], name='input_y')
        self.dropout_keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob')
        self.batch_size = tf.placeholder(tf.int32, [])
        self.pad = tf.placeholder(tf.float32, [None, 1, embedding_size, 1], name='pad')
        self.real_len = tf.placeholder(tf.int32, [None], name='real_len')

        l2_loss = tf.constant(0.0)

        with tf.device('/cpu:0'), tf.name_scope('embedding'):
            self.emb_var = tf.Variable(embedding_mat, name='emb_var')
            # if not non_static:
            #   self.emb_var = tf.constant(embedding_mat, name='enb_var')
            # else:
            #   self.emb_var = tf.Variable(embedding_mat, name='emb_var')
            self.embedded_chars = tf.nn.embedding_lookup(self.emb_var, self.input_x)
            self.emb = tf.expand_dims(self.embedded_chars, -1)

The tensor I would like to visualize is embedded_chars.

And this is the code where I am given input to the projector api:

config = projector.ProjectorConfig()
config.model_checkpoint_path = checkpoint_prefix + str(best_at_step) +'.ckpt'
embedding = config.embeddings.add()
embedding.tensor_name = cnn_rnn.embedded_chars.name

#embedding.metadata_path = 'metadata.tsv'
emb_writer = tf.summary.FileWriter(metadata_path,sess.graph)
projector.visualize_embeddings(emb_writer, config)

My expectation: I want to see my trained input data and how its being classified.

Actual result: When I use embedded_chars tensor as input to projector, noting loads. however, when I use emb_var I see the embeddings loading. The problem is emb_var is just my vocabulary but I need to see my actual dataset.


Solution

  • Figured it out.

    final_embed_matrix = sess.run(cnn_rnn.emb_var)
                embedding_var = tf.Variable(final_embed_matrix, name='embedding_viz' + str(i))
                saver_embed = tf.train.Saver([embedding_var, output_var])
                sess.run(embedding_var.initializer)
                config = projector.ProjectorConfig()
                config.model_checkpoint_path = emb_dir + '/' + foldername + str(best_at_step)+'viz' +'.ckpt'
                emb_writer = tf.summary.FileWriter(emb_dir, sess.graph)
    
                embedding = config.embeddings.add()
                embedding.metadata_path = foldername + '_metadata.tsv'
                embedding.tensor_name = output_var.name
    
                embedding = config.embeddings.add()
                embedding.metadata_path = 'metadata.tsv'
                embedding.tensor_name = embedding_var.name
    
                projector.visualize_embeddings(emb_writer, config)
                saver_embed.save(sess, checkpoint_viz_prefix + str(best_at_step)+'viz' +'.ckpt')