I want to render a gym env in test but not in learning. Here is my code:
import gymnasium as gym
import numpy as np
env = gym.make('FrozenLake-v1')
# initialize Q table
Q = np.zeros([env.observation_space.n, env.action_space.n])
print(Q)
# parameter
lr = 0.8
gamma = 0.95
num_episodes = 2000
# learning
for i in range(num_episodes):
state = env.reset()[0]
done = False
while not done:
# epsilon-greedy policy
if np.random.uniform(0, 1) < 0.5:
action = env.action_space.sample()
else:
action = np.argmax(Q[state,:])
next_state, reward, done, _, _ = env.step(action)
Q[state, action] = (1 - lr) * Q[state, action] + \
lr * (reward + gamma * np.max(Q[next_state, :]))
state = next_state
print(Q)
# test
state = env.reset()[0]
done = False
while not done:
action = np.argmax(Q[state, :])
next_state, reward, done, _, _ = env.step(action)
state = next_state
env.render()
It doesn't render and give warning: WARN: You are calling render method without specifying any render mode. You can specify the render_mode at initialization, e.g. gym.make("FrozenLake-v1", render_mode="rgb_array")
If I specify the render_mode
to 'human'
, it will render both in learning and test, which I don't want.
How should I do?
You can just recreate a new environment specifying the render mode.
import gymnasium as gym
import numpy as np
env_train = gym.make('FrozenLake-v1')
# initialize Q table
Q = np.zeros([env_train.observation_space.n, env_train.action_space.n])
print(Q)
# parameter
lr = 0.8
gamma = 0.95
num_episodes = 2000
# learning
for i in range(num_episodes):
state = env_train.reset()[0]
done = False
while not done:
# epsilon-greedy policy
if np.random.uniform(0, 1) < 0.5:
action = env_train.action_space.sample()
else:
action = np.argmax(Q[state,:])
next_state, reward, done, _, _ = env_train.step(action)
Q[state, action] = (1 - lr) * Q[state, action] + \
lr * (reward + gamma * np.max(Q[next_state, :]))
state = next_state
print(Q)
env_train.close()
# test
env_test = gym.make('FrozenLake-v1', render_mode="human")
state = env_test.reset()[0]
done = False
while not done:
action = np.argmax(Q[state, :])
next_state, reward, done, _, _ = env_test.step(action)
state = next_state
env_test.render()
env_test.close()