Search code examples
pytorch

Simple RNN Error "Input and hidden tensors are not at the same device, found input tensor at cuda:0 and hidden tensor at cpu" How to?


I'm working on a basic RNN-NLP classifier using PyTorch, and trying to use CUDA for acceleration.(On Google_Colab) but, I can't solve this error. The code is written like this.

error message

Input and hidden tensors are not at the same device, found input tensor at cuda:0 and hidden tensor at cpu

RNN class

class RNN(nn.Module):
  def __init__(self, vocab_size, emb_size, hidden_size, output_size):
    super().__init__()
    self.hidden_size = hidden_size
    self.emb = nn.Embedding(vocab_size, emb_size)
    self.rnn = nn.RNN(emb_size, hidden_size, nonlinearity='tanh', batch_first=True)
    self.fc = nn.Linear(hidden_size, output_size)

  def forward(self, x):
    self.batch_size = x.size()[0]
    hidden = self.init_hidden()
    emb = self.emb(x)
    out, hidden = self.rnn(emb, hidden)
    out = self.fc(out[:, -1, :])
    return out

  def init_hidden(self):
    hidden = torch.zeros(1, self.batch_size, self.hidden_size)
    return hidden

device

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Setting var

VOCAB_SIZE = len(word_id.keys()) +1
EMB_SIZE = 300
OUTPUT_SIZE = 4
HIDDEN_SIZE = 50

model = RNN(VOCAB_SIZE,EMB_SIZE, HIDDEN_SIZE, OUTPUT_SIZE)

model = model.to(device)

Predict

for i in range(10):
  # datasetet の、リスト0indexに入力要素
  X, y = dataset_train[i]
  X = X.to(device)
  print(torch.softmax(model(X.unsqueeze(0)), dim=1))

This code works on CPU. but, can't works on "GPU". Follow this error, I try to make some fix code. ex) hidden.to(device),,,,

but,I can't solve... Pleas someone tell me how to solve. Thank you very much for my question.


Solution

  • Doesn't doing something like the following work?

    device = torch.device("cuda" if torch.cuda.is_available() else "CPU")
    
    class RNN(nn.Module):
      def __init__(self, vocab_size, emb_size, hidden_size, output_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.emb = nn.Embedding(vocab_size, emb_size)
        self.rnn = nn.RNN(emb_size, hidden_size, nonlinearity='tanh', batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
        self.to(device)
    
      def forward(self, x):
        self.batch_size = x.size()[0]
        hidden = self.init_hidden()
        emb = self.emb(x)
        out, hidden = self.rnn(emb, hidden)
        out = self.fc(out[:, -1, :])
        return out
    
      def init_hidden(self):
        hidden = torch.zeros(1, self.batch_size, self.hidden_size).to(device)
        return hidden