Search code examples
pythonnumpyreinforcement-learningopenai-gymstable-baselines

Why does model.learn() return a numpy error?


I am trying to train my model, which is a breakout game in gym. I am trying to train the environment with 100000 timesteps. However, it keeps returning this error message. Can someone explain why and help me solve this? I am a beginner in machine learning. Here is the code and the error message below:

import gym
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env
import os
import numpy as np
env = make_atari_env("ALE/Breakout-v5", n_envs=4, seed=0)
env = VecFrameStack(env, n_stack=4)
log_path = os.path.join("Traning", "Logs")
model = A2C("CnnPolicy", env, verbose=1, tensorboard_log=log_path)
model.learn(total_timesteps=100000)

ERROR MESSAGE:
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_7676/2886439321.py in <module>
----> 1 model.learn(total_timesteps=100000)

D:\Anaconda\lib\site-packages\stable_baselines3\a2c\a2c.py in learn(self, total_timesteps, callback, log_interval, eval_env, eval_freq, n_eval_episodes, tb_log_name, eval_log_path, reset_num_timesteps)
    189     ) -> "A2C":
    190 
--> 191         return super(A2C, self).learn(
    192             total_timesteps=total_timesteps,
    193             callback=callback,

D:\Anaconda\lib\site-packages\stable_baselines3\common\on_policy_algorithm.py in learn(self, total_timesteps, callback, log_interval, eval_env, eval_freq, n_eval_episodes, tb_log_name, eval_log_path, reset_num_timesteps)
    240         iteration = 0
    241 
--> 242         total_timesteps, callback = self._setup_learn(
    243             total_timesteps, eval_env, callback, eval_freq, n_eval_episodes, eval_log_path, reset_num_timesteps, tb_log_name
    244         )

D:\Anaconda\lib\site-packages\stable_baselines3\common\base_class.py in _setup_learn(self, total_timesteps, eval_env, callback, eval_freq, n_eval_episodes, log_path, reset_num_timesteps, tb_log_name)
    427         # Avoid resetting the environment when calling ``.learn()`` consecutive times
    428         if reset_num_timesteps or self._last_obs is None:
--> 429             self._last_obs = self.env.reset()  # pytype: disable=annotation-type-mismatch
    430             self._last_episode_starts = np.ones((self.env.num_envs,), dtype=bool)
    431             # Retrieve unnormalized observation for saving into the buffer

D:\Anaconda\lib\site-packages\stable_baselines3\common\vec_env\vec_transpose.py in reset(self)
    108         Reset all environments
    109         """
--> 110         return self.transpose_observations(self.venv.reset())
    111 
    112     def close(self) -> None:

D:\Anaconda\lib\site-packages\stable_baselines3\common\vec_env\vec_frame_stack.py in reset(self)
     56         Reset all environments
     57         """
---> 58         observation = self.venv.reset()  # pytype:disable=annotation-type-mismatch
     59 
     60         observation = self.stackedobs.reset(observation)

D:\Anaconda\lib\site-packages\stable_baselines3\common\vec_env\dummy_vec_env.py in reset(self)
     59     def reset(self) -> VecEnvObs:
     60         for env_idx in range(self.num_envs):
---> 61             obs = self.envs[env_idx].reset()
     62             self._save_obs(env_idx, obs)
     63         return self._obs_from_buf()

D:\Anaconda\lib\site-packages\gym\core.py in reset(self, **kwargs)
    290 
    291     def reset(self, **kwargs):
--> 292         return self.env.reset(**kwargs)
    293 
    294     def render(self, mode="human", **kwargs):

D:\Anaconda\lib\site-packages\gym\core.py in reset(self, **kwargs)
    331 class RewardWrapper(Wrapper):
    332     def reset(self, **kwargs):
--> 333         return self.env.reset(**kwargs)
    334 
    335     def step(self, action):

D:\Anaconda\lib\site-packages\gym\core.py in reset(self, **kwargs)
    317 class ObservationWrapper(Wrapper):
    318     def reset(self, **kwargs):
--> 319         observation = self.env.reset(**kwargs)
    320         return self.observation(observation)
    321 

D:\Anaconda\lib\site-packages\stable_baselines3\common\atari_wrappers.py in reset(self, **kwargs)
     57 
     58     def reset(self, **kwargs) -> np.ndarray:
---> 59         self.env.reset(**kwargs)
     60         obs, _, done, _ = self.env.step(1)
     61         if done:

D:\Anaconda\lib\site-packages\stable_baselines3\common\atari_wrappers.py in reset(self, **kwargs)
    104         """
    105         if self.was_real_done:
--> 106             obs = self.env.reset(**kwargs)
    107         else:
    108             # no-op step to advance from terminal/lost life state

D:\Anaconda\lib\site-packages\stable_baselines3\common\atari_wrappers.py in reset(self, **kwargs)
    152 
    153     def reset(self, **kwargs) -> GymObs:
--> 154         return self.env.reset(**kwargs)
    155 
    156 

D:\Anaconda\lib\site-packages\stable_baselines3\common\atari_wrappers.py in reset(self, **kwargs)
     34             noops = self.override_num_noops
     35         else:
---> 36             noops = self.unwrapped.np_random.randint(1, self.noop_max + 1)
     37         assert noops > 0
     38         obs = np.zeros(0)

AttributeError: 'numpy.random._generator.Generator' object has no attribute 'randint'

Solution

  • This looks like a bug in the stable_baselines3 package. You need to report this to the authors.