Search code examples
tensorflowtensorboard

tensorfboard embeddings hangs with "Computing PCA"


I'm trying to display my embeddings in tensorboard. When I open embeddings tab of tensorboard I get: "Computing PCA..." and tensorboard hangs infinitely.

Before that it does load my tensor of shape 200x128. It does find the metadata file too.

I tried that on TF versions 0.12 and 1.1 with the same result.

features = np.zeros(shape=(num_batches*batch_size, 128), dtype=float)
embedding_var = tf.Variable(features, name='feature_embedding')
config = projector.ProjectorConfig()
embedding = config.embeddings.add()
embedding.tensor_name = 'feature_embedding'
metadata_path = os.path.join(self.log_dir, 'metadata.tsv')
embedding.metadata_path = metadata_path

with tf.Session(config=self.config) as sess:
  tf.global_variables_initializer().run()
  restorer = tf.train.Saver()
  restorer.restore(sess, self.pretrained_model_path)

  with open(metadata_path, 'w') as f:

    for step in range(num_batches):
      batch_images, batch_labels = data.next()

        for label in batch_labels:
          f.write('%s\n' % label)

        feed_dict = {model.images: batch_images}
        features[step*batch_size : (step+1)*batch_size, :] = \ 
                    sess.run(model.features, feed_dict)

  sess.run(embedding_var.initializer)
  projector.visualize_embeddings(tf.summary.FileWriter(self.log_dir), config)

Solution

  • I don't know what was wrong in the code above, but I rewrote it in a different way (below), and it works. The difference is when and how the embedding_var is initialized.

    I also made a gist to copy-paste code from out of this.

    # a numpy array for embeddings and a list for labels
    features = np.zeros(shape=(num_batches*self.batch_size, 128), dtype=float)
    labels   = []   
    
    
    # compute embeddings batch by batch
    with tf.Session(config=self.config) as sess:
      tf.global_variables_initializer().run()
      restorer = tf.train.Saver()
      restorer.restore(sess, self.pretrained_model)
    
      for step in range(num_batches):
        batch_images, batch_labels = data.next()
    
        labels += batch_labels
    
        feed_dict = {model.images: batch_images}                     
        features[step*self.batch_size : (step+1)*self.batch_size, :] = \
                    sess.run(model.features, feed_dict)
    
    
    # write labels
    metadata_path = os.path.join(self.log_dir, 'metadata.tsv')
    with open(metadata_path, 'w') as f:
      for label in labels:
        f.write('%s\n' % label)
    
    
    # write embeddings
    with tf.Session(config=self.config) as sess:
    
      config = projector.ProjectorConfig()
      embedding = config.embeddings.add()
      embedding.tensor_name = 'feature_embedding'
      embedding.metadata_path = metadata_path
    
      embedding_var = tf.Variable(features, name='feature_embedding')
      sess.run(embedding_var.initializer)
      projector.visualize_embeddings(tf.summary.FileWriter(self.log_dir), config)                  
    
      saver = tf.train.Saver({"feature_embedding": embedding_var})
      saver.save(sess, os.path.join(self.log_dir, 'model_features'))