Simple RNN Error "Input and hidden tensors are not at the same device, found input tensor at cuda:0 and hidden tensor at cpu" How to?

I'm working on a basic RNN-NLP classifier using PyTorch, and trying to use CUDA for acceleration.(On Google_Colab) but, I can't solve this error. The code is written like this.

error message

Input and hidden tensors are not at the same device, found input tensor at cuda:0 and hidden tensor at cpu

RNN class

class RNN(nn.Module):
  def __init__(self, vocab_size, emb_size, hidden_size, output_size):
    super().__init__()
    self.hidden_size = hidden_size
    self.emb = nn.Embedding(vocab_size, emb_size)
    self.rnn = nn.RNN(emb_size, hidden_size, nonlinearity='tanh', batch_first=True)
    self.fc = nn.Linear(hidden_size, output_size)

  def forward(self, x):
    self.batch_size = x.size()[0]
    hidden = self.init_hidden()
    emb = self.emb(x)
    out, hidden = self.rnn(emb, hidden)
    out = self.fc(out[:, -1, :])
    return out

  def init_hidden(self):
    hidden = torch.zeros(1, self.batch_size, self.hidden_size)
    return hidden

device

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Setting var

VOCAB_SIZE = len(word_id.keys()) +1
EMB_SIZE = 300
OUTPUT_SIZE = 4
HIDDEN_SIZE = 50

model = RNN(VOCAB_SIZE,EMB_SIZE, HIDDEN_SIZE, OUTPUT_SIZE)

model = model.to(device)

Predict

for i in range(10):
  # datasetet　の、リスト０indexに入力要素
  X, y = dataset_train[i]
  X = X.to(device)
  print(torch.softmax(model(X.unsqueeze(0)), dim=1))

This code works on CPU. but, can't works on "GPU". Follow this error, I try to make some fix code. ex) hidden.to(device),,,,

but,I can't solve... Pleas someone tell me how to solve. Thank you very much for my question.

Solution

Doesn't doing something like the following work?

device = torch.device("cuda" if torch.cuda.is_available() else "CPU")

class RNN(nn.Module):
  def __init__(self, vocab_size, emb_size, hidden_size, output_size):
    super().__init__()
    self.hidden_size = hidden_size
    self.emb = nn.Embedding(vocab_size, emb_size)
    self.rnn = nn.RNN(emb_size, hidden_size, nonlinearity='tanh', batch_first=True)
    self.fc = nn.Linear(hidden_size, output_size)
    self.to(device)

  def forward(self, x):
    self.batch_size = x.size()[0]
    hidden = self.init_hidden()
    emb = self.emb(x)
    out, hidden = self.rnn(emb, hidden)
    out = self.fc(out[:, -1, :])
    return out

  def init_hidden(self):
    hidden = torch.zeros(1, self.batch_size, self.hidden_size).to(device)
    return hidden