python pytorch typeerror valueerror openai-gym

Is it possible to resolve TypeError: argument 'input' (position 1) must be Tensor error without retraining the model?

I have made a model in PyTorch for use in an openAI Gym environment. I have made it in the following way:

class Policy(nn.Module):
    def __init__(self, s_size=8, h_size=16, a_size=4):
        super(Policy, self).__init__()
        self.fc1 = nn.Linear(s_size, h_size)
        self.fc2 = nn.Linear(h_size, 32)
        self.fc3 = nn.Linear(32, 64)
        self.fc4 = nn.Linear(64, a_size)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return F.softmax(x, dim=1 )
    
    def act(self, state):
        state = torch.from_numpy(state).float().unsqueeze(0).to(device)
        probs = self.forward(state).cpu()
        m = Categorical(probs)
        action = m.sample()
        return action.item(), m.log_prob(action)

I then save it's state in a dictionary and use it as following :

env = gym.make('LunarLander-v2')

policy = Policy().to(torch.device('cpu'))
policy.load_state_dict(torch.load('best_params_cloud.ckpt', map_location='cpu'))
policy.eval()
ims = []
rewards = []
state = env.reset()
for step in range(STEPS):
    img = env.render(mode='rgb_array')
    action,log_prob = policy(state)
        # print(action)
    state,reward,done,i_ = env.step(action)
    rewards.append(reward)
    # print(reward,done)
    cv2_im_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    pil_im = Image.fromarray(cv2_im_rgb)

    draw = ImageDraw.Draw(pil_im)

    # Choose a font
    font = ImageFont.truetype("Roboto-Regular.ttf", 20)

    # Draw the text
    draw.text((0, 0), f"Step: {step} Action : {action} Reward: {int(reward)} Total Rewards: {int(np.sum(rewards))} done: {done}", font=font,fill="#FDFEFE")

    # Save the image
    img = cv2.cvtColor(np.array(pil_im), cv2.COLOR_RGB2BGR)
    im = plt.imshow(img, animated=True)
    ims.append([im])
    if done:
        env.close()


                
        
        break

Writer = animation.writers['pillow']
writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800)
im_ani = animation.ArtistAnimation(fig, ims, interval=50, repeat_delay=3000,
                                    blit=True)
im_ani.save('ll_train1.gif', writer=writer)

But this returns the error:

TypeError                                 Traceback (most recent call last)
<ipython-input-3-da32222edde2> in <module>
      9 for step in range(STEPS):
     10     img = env.render(mode='rgb_array')
---> 11     action,log_prob = policy(state)
     12         # print(action)
     13     state,reward,done,i_ = env.step(action)

~\anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
    887             result = self._slow_forward(*input, **kwargs)
    888         else:
--> 889             result = self.forward(*input, **kwargs)
    890         for hook in itertools.chain(
    891                 _global_forward_hooks.values(),

<ipython-input-2-66d42ebb791e> in forward(self, x)
     33 
     34     def forward(self, x):
---> 35         x = F.relu(self.fc1(x))
     36         x = F.relu(self.fc2(x))
     37         x = F.relu(self.fc3(x))

~\anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
    887             result = self._slow_forward(*input, **kwargs)
    888         else:
--> 889             result = self.forward(*input, **kwargs)
    890         for hook in itertools.chain(
    891                 _global_forward_hooks.values(),

~\anaconda3\lib\site-packages\torch\nn\modules\linear.py in forward(self, input)
     92 
     93     def forward(self, input: Tensor) -> Tensor:
---> 94         return F.linear(input, self.weight, self.bias)
     95 
     96     def extra_repr(self) -> str:

~\anaconda3\lib\site-packages\torch\nn\functional.py in linear(input, weight, bias)
   1751     if has_torch_function_variadic(input, weight):
   1752         return handle_torch_function(linear, (input, weight), input, weight, bias=bias)
-> 1753     return torch._C._nn.linear(input, weight, bias)
   1754 
   1755 

TypeError: linear(): argument 'input' (position 1) must be Tensor, not numpy.ndarray

I tried to change the forward function by adding the following line of code:

def forward(self, x):
        x = torch.tensor(x,dtype=torch.float32,device=DEVICE).unsqueeze(0) //Added this line
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return F.softmax(x, dim=1 )

But this also returns an error : ValueError: not enough values to unpack (expected 2, got 1)

The policy took a lot of time to train, and I am trying to avoid retraining it, is there a workaround for it to run without retraining?

Solution

This error is not related to your model. forward function only returns the probability distribution but what you need is the action and corresponded probability (output of Policy.act).

Change your code from

for step in range(STEPS):
    img = env.render(mode='rgb_array')

    # This line causes the error.
    action,log_prob = policy(state)

for step in range(STEPS):
    img = env.render(mode='rgb_array')

    # This line causes the error.
    action,log_prob = policy.act(state)