I am trying to implement triplet loss in Tensorflow where the triplets are obtained in an online mining way. In my particular problem, I already have the anchor(image) - positive(text)
pairs. What I want to achieve is have the triplets anchor(image) - positive(text) - negative(text)
and anchor(text) - positive(image) - negative(image)
for image-text
pairs in the batch.
Let me know if you need any further information and looking forward to your answers!
I found that this is the solution I need:
def compute_loss(images: tf.Tensor, texts: tf.Tensor, margin: float) -> tf.Tensor:
with tf.variable_scope(name_or_scope="loss"):
scores = tf.matmul(images, texts, transpose_b=True)
diagonal = tf.diag_part(scores)
# Compare every diagonal score to scores in its column i.e
# All contrastive images for each sentence
cost_s = tf.maximum(0.0, margin - tf.reshape(diagonal, [-1, 1]) + scores)
# Compare every diagonal score to scores in its row i.e
# All contrastive sentences for each image
cost_im = tf.maximum(0.0, margin - diagonal + scores)
# Clear diagonals
cost_s = tf.linalg.set_diag(cost_s, tf.zeros(tf.shape(cost_s)[0]))
cost_im = tf.linalg.set_diag(cost_im, tf.zeros(tf.shape(cost_im)[0]))
# For each positive pair (i,s) sum over the negative images
cost_s = tf.reduce_sum(cost_s, axis=1)
# For each positive pair (i,s) sum over the negative texts
cost_im = tf.reduce_sum(cost_im, axis=0)
triplet_loss = tf.reduce_mean(cost_s) + tf.reduce_mean(cost_im)
return triplet_loss