I am trying to train my model, which is a breakout game in gym. I am trying to train the environment with 100000 timesteps. However, it keeps returning this error message. Can someone explain why and help me solve this? I am a beginner in machine learning. Here is the code and the error message below:
import gym
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env
import os
import numpy as np
env = make_atari_env("ALE/Breakout-v5", n_envs=4, seed=0)
env = VecFrameStack(env, n_stack=4)
log_path = os.path.join("Traning", "Logs")
model = A2C("CnnPolicy", env, verbose=1, tensorboard_log=log_path)
model.learn(total_timesteps=100000)
ERROR MESSAGE:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_7676/2886439321.py in <module>
----> 1 model.learn(total_timesteps=100000)
D:\Anaconda\lib\site-packages\stable_baselines3\a2c\a2c.py in learn(self, total_timesteps, callback, log_interval, eval_env, eval_freq, n_eval_episodes, tb_log_name, eval_log_path, reset_num_timesteps)
189 ) -> "A2C":
190
--> 191 return super(A2C, self).learn(
192 total_timesteps=total_timesteps,
193 callback=callback,
D:\Anaconda\lib\site-packages\stable_baselines3\common\on_policy_algorithm.py in learn(self, total_timesteps, callback, log_interval, eval_env, eval_freq, n_eval_episodes, tb_log_name, eval_log_path, reset_num_timesteps)
240 iteration = 0
241
--> 242 total_timesteps, callback = self._setup_learn(
243 total_timesteps, eval_env, callback, eval_freq, n_eval_episodes, eval_log_path, reset_num_timesteps, tb_log_name
244 )
D:\Anaconda\lib\site-packages\stable_baselines3\common\base_class.py in _setup_learn(self, total_timesteps, eval_env, callback, eval_freq, n_eval_episodes, log_path, reset_num_timesteps, tb_log_name)
427 # Avoid resetting the environment when calling ``.learn()`` consecutive times
428 if reset_num_timesteps or self._last_obs is None:
--> 429 self._last_obs = self.env.reset() # pytype: disable=annotation-type-mismatch
430 self._last_episode_starts = np.ones((self.env.num_envs,), dtype=bool)
431 # Retrieve unnormalized observation for saving into the buffer
D:\Anaconda\lib\site-packages\stable_baselines3\common\vec_env\vec_transpose.py in reset(self)
108 Reset all environments
109 """
--> 110 return self.transpose_observations(self.venv.reset())
111
112 def close(self) -> None:
D:\Anaconda\lib\site-packages\stable_baselines3\common\vec_env\vec_frame_stack.py in reset(self)
56 Reset all environments
57 """
---> 58 observation = self.venv.reset() # pytype:disable=annotation-type-mismatch
59
60 observation = self.stackedobs.reset(observation)
D:\Anaconda\lib\site-packages\stable_baselines3\common\vec_env\dummy_vec_env.py in reset(self)
59 def reset(self) -> VecEnvObs:
60 for env_idx in range(self.num_envs):
---> 61 obs = self.envs[env_idx].reset()
62 self._save_obs(env_idx, obs)
63 return self._obs_from_buf()
D:\Anaconda\lib\site-packages\gym\core.py in reset(self, **kwargs)
290
291 def reset(self, **kwargs):
--> 292 return self.env.reset(**kwargs)
293
294 def render(self, mode="human", **kwargs):
D:\Anaconda\lib\site-packages\gym\core.py in reset(self, **kwargs)
331 class RewardWrapper(Wrapper):
332 def reset(self, **kwargs):
--> 333 return self.env.reset(**kwargs)
334
335 def step(self, action):
D:\Anaconda\lib\site-packages\gym\core.py in reset(self, **kwargs)
317 class ObservationWrapper(Wrapper):
318 def reset(self, **kwargs):
--> 319 observation = self.env.reset(**kwargs)
320 return self.observation(observation)
321
D:\Anaconda\lib\site-packages\stable_baselines3\common\atari_wrappers.py in reset(self, **kwargs)
57
58 def reset(self, **kwargs) -> np.ndarray:
---> 59 self.env.reset(**kwargs)
60 obs, _, done, _ = self.env.step(1)
61 if done:
D:\Anaconda\lib\site-packages\stable_baselines3\common\atari_wrappers.py in reset(self, **kwargs)
104 """
105 if self.was_real_done:
--> 106 obs = self.env.reset(**kwargs)
107 else:
108 # no-op step to advance from terminal/lost life state
D:\Anaconda\lib\site-packages\stable_baselines3\common\atari_wrappers.py in reset(self, **kwargs)
152
153 def reset(self, **kwargs) -> GymObs:
--> 154 return self.env.reset(**kwargs)
155
156
D:\Anaconda\lib\site-packages\stable_baselines3\common\atari_wrappers.py in reset(self, **kwargs)
34 noops = self.override_num_noops
35 else:
---> 36 noops = self.unwrapped.np_random.randint(1, self.noop_max + 1)
37 assert noops > 0
38 obs = np.zeros(0)
AttributeError: 'numpy.random._generator.Generator' object has no attribute 'randint'
This looks like a bug in the stable_baselines3
package. You need to report this to the authors.