I have made a model in PyTorch for use in an openAI Gym environment. I have made it in the following way:
class Policy(nn.Module):
def __init__(self, s_size=8, h_size=16, a_size=4):
super(Policy, self).__init__()
self.fc1 = nn.Linear(s_size, h_size)
self.fc2 = nn.Linear(h_size, 32)
self.fc3 = nn.Linear(32, 64)
self.fc4 = nn.Linear(64, a_size)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = self.fc4(x)
return F.softmax(x, dim=1 )
def act(self, state):
state = torch.from_numpy(state).float().unsqueeze(0).to(device)
probs = self.forward(state).cpu()
m = Categorical(probs)
action = m.sample()
return action.item(), m.log_prob(action)
I then save it's state in a dictionary and use it as following :
env = gym.make('LunarLander-v2')
policy = Policy().to(torch.device('cpu'))
policy.load_state_dict(torch.load('best_params_cloud.ckpt', map_location='cpu'))
policy.eval()
ims = []
rewards = []
state = env.reset()
for step in range(STEPS):
img = env.render(mode='rgb_array')
action,log_prob = policy(state)
# print(action)
state,reward,done,i_ = env.step(action)
rewards.append(reward)
# print(reward,done)
cv2_im_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
pil_im = Image.fromarray(cv2_im_rgb)
draw = ImageDraw.Draw(pil_im)
# Choose a font
font = ImageFont.truetype("Roboto-Regular.ttf", 20)
# Draw the text
draw.text((0, 0), f"Step: {step} Action : {action} Reward: {int(reward)} Total Rewards: {int(np.sum(rewards))} done: {done}", font=font,fill="#FDFEFE")
# Save the image
img = cv2.cvtColor(np.array(pil_im), cv2.COLOR_RGB2BGR)
im = plt.imshow(img, animated=True)
ims.append([im])
if done:
env.close()
break
Writer = animation.writers['pillow']
writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800)
im_ani = animation.ArtistAnimation(fig, ims, interval=50, repeat_delay=3000,
blit=True)
im_ani.save('ll_train1.gif', writer=writer)
But this returns the error:
TypeError Traceback (most recent call last)
<ipython-input-3-da32222edde2> in <module>
9 for step in range(STEPS):
10 img = env.render(mode='rgb_array')
---> 11 action,log_prob = policy(state)
12 # print(action)
13 state,reward,done,i_ = env.step(action)
~\anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
--> 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
<ipython-input-2-66d42ebb791e> in forward(self, x)
33
34 def forward(self, x):
---> 35 x = F.relu(self.fc1(x))
36 x = F.relu(self.fc2(x))
37 x = F.relu(self.fc3(x))
~\anaconda3\lib\site-packages\torch\nn\modules\module.py in _call_impl(self, *input, **kwargs)
887 result = self._slow_forward(*input, **kwargs)
888 else:
--> 889 result = self.forward(*input, **kwargs)
890 for hook in itertools.chain(
891 _global_forward_hooks.values(),
~\anaconda3\lib\site-packages\torch\nn\modules\linear.py in forward(self, input)
92
93 def forward(self, input: Tensor) -> Tensor:
---> 94 return F.linear(input, self.weight, self.bias)
95
96 def extra_repr(self) -> str:
~\anaconda3\lib\site-packages\torch\nn\functional.py in linear(input, weight, bias)
1751 if has_torch_function_variadic(input, weight):
1752 return handle_torch_function(linear, (input, weight), input, weight, bias=bias)
-> 1753 return torch._C._nn.linear(input, weight, bias)
1754
1755
TypeError: linear(): argument 'input' (position 1) must be Tensor, not numpy.ndarray
I tried to change the forward function by adding the following line of code:
def forward(self, x):
x = torch.tensor(x,dtype=torch.float32,device=DEVICE).unsqueeze(0) //Added this line
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = F.relu(self.fc3(x))
x = self.fc4(x)
return F.softmax(x, dim=1 )
But this also returns an error : ValueError: not enough values to unpack (expected 2, got 1)
The policy took a lot of time to train, and I am trying to avoid retraining it, is there a workaround for it to run without retraining?
This error is not related to your model.
forward
function only returns the probability distribution but what you need is the action and corresponded probability (output of Policy.act
).
Change your code from
for step in range(STEPS):
img = env.render(mode='rgb_array')
# This line causes the error.
action,log_prob = policy(state)
to
for step in range(STEPS):
img = env.render(mode='rgb_array')
# This line causes the error.
action,log_prob = policy.act(state)