How to use tfidf matrix in lstm in pytorch

I am using inceptionv3 and LSTM for image captioning task and want to use TFIDF matrix instead of embedding layer in LSTM decoder for word representation. What is the correct way to do this in pytorch?

class decoderRNN(nn.Module):
    def __init__(self, embed_size,vocab_size, hidden_size, num_layers,em_mat):
        super(decoderRNN, self).__init__()
        self.embedding =nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, num_layers)
        self.linear = nn.Linear(hidden_size, vocab_size)
        self.dropout = nn.Dropout(0.5)
    
    def forward(self, features, caption):
        embeddings = self.dropout(self.embedding(caption))
        embeddings = torch.cat((features.unsqueeze(0),embeddings), dim=0)
        hiddens, _ = self.lstm(embeddings)
        outputs = self.linear(hiddens)
        return outputs

The shape of tfidf dense matrix is (2295, 20748)

Solution

you can follow something like this. first convert the TF-IDF matrix to a PyTorch tensor and move it to GPU (or cpu) and then remove the embedding layer in the decoderRNN constructor and modify the forward method to use the TF-IDF tensor instead of embeddings.

class decoderRNN(nn.Module):
    def __init__(self, tfidf_tensor, hidden_size, num_layers, vocab_size):
        super(decoderRNN, self).__init__()
        self.tfidf_embedding = nn.Embedding.from_pretrained(tfidf_tensor, freeze=True)
        self.lstm = nn.LSTM(input_size=tfidf_tensor.shape[1], hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        self.linear = nn.Linear(hidden_size, vocab_size)
        
    def forward(self, features, caption):
        tfidf_embed = self.tfidf_embedding(caption)
        tfidf_embed = torch.cat((features.unsqueeze(1), tfidf_embed), dim=1)
        hiddens, _ = self.lstm(tfidf_embed)
        outputs = self.linear(hiddens)
        return outputs