Search code examples
pythonartificial-intelligencereinforcement-learningopenai-gymstable-baselines

AssertionError: The algorithm only supports <class 'gym.spaces.box.Box'> as action spaces but Box(-1.0, 1.0, (3,), float32) was provided


So basically I tried converting this custom gym environment from https://github.com/Gor-Ren/gym-jsbsim to use farama foundation's gymnasium api. This is my repo whih I am working on: https://github.com/sryu1/jsbgym When I try training the environment with gym-jsbsim, it works, but with the gymnasium environment, I get the error that is in the title... I feel like Lines 234 to 242 in tasks.py are somewhere that causes the problem. the same error appears for all algorithms... If anyone could tell me what I did wrong, it'd be very appreciated! I tried training the custom environment with stable baselines 3 all algos that support Box and they all get the same error.

This is my ipynb in raw format, if you just open a new ipynb file with notepad then past this in then open again, it should work.

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "7e5a3876",
   "metadata": {},
   "outputs": [],
   "source": [
    "import jsbsim\n",
    "import gymnasium as gym\n",
    "import jsbgym\n",
    "import os\n",
    "from stable_baselines3 import DDPG\n",
    "from stable_baselines3.common.callbacks import BaseCallback\n",
    "from stable_baselines3.common.monitor import Monitor\n",
    "from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "c78d0a36",
   "metadata": {},
   "outputs": [],
   "source": [
    "env = gym.make('JSBSim-TurnHeadingControlTask-Cessna172P-Shaping.STANDARD-NoFG-v0')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "17ea6f3e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([ 5.00000000e+03, -1.52383093e-16,  1.16583160e-16,  2.02536000e+02,\n",
       "        -4.26325641e-14, -7.10542736e-15,  0.00000000e+00,  0.00000000e+00,\n",
       "         0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,\n",
       "         0.00000000e+00, -3.72529030e-09, -9.68166768e-15, -1.60633375e+02,\n",
       "         2.99000000e+02]),\n",
       " {})"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "env.reset()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "60a7ecab",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total Reward for episode 1 is 24.43895374007404\n",
      "Total Reward for episode 2 is 17.88229242588352\n",
      "Total Reward for episode 3 is 20.844080298653026\n",
      "Total Reward for episode 4 is 23.09412403738447\n",
      "Total Reward for episode 5 is 22.540357474496297\n"
     ]
    }
   ],
   "source": [
    "for episode in range(1, 6):\n",
    "    obs = env.reset()\n",
    "    done = False\n",
    "    total_reward = 0\n",
    "    while not done:\n",
    "        obs, reward, done, _, info = env.step(env.action_space.sample())\n",
    "        total_reward += reward\n",
    "    print(\"Total Reward for episode {} is {}\".format(episode, total_reward))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "d819a741",
   "metadata": {},
   "outputs": [],
   "source": [
    "class TrainAndLoggingCallback(BaseCallback):\n",
    "    def __init__(self, check_freq, save_path, verbose=1):\n",
    "        super(TrainAndLoggingCallback, self).__init__(verbose)\n",
    "        self.check_freq = check_freq\n",
    "        self.save_path = save_path\n",
    "\n",
    "    def _init_callback(self):\n",
    "        if self.save_path is not None:\n",
    "            os.makedirs(self.save_path, exist_ok=True)\n",
    "\n",
    "    def _on_step(self):\n",
    "        if self.n_calls % self.check_freq == 0:\n",
    "            model_path = os.path.join(\n",
    "                self.save_path, \"best_model_{}\".format(self.n_calls)\n",
    "            )\n",
    "            self.model.save(model_path)\n",
    "\n",
    "        return True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "a0f962d6",
   "metadata": {},
   "outputs": [],
   "source": [
    "CHECKPOINT_DIR = \"./train/\"\n",
    "LOG_DIR = \"./logs/\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "d2ab0033",
   "metadata": {},
   "outputs": [],
   "source": [
    "callback = TrainAndLoggingCallback(check_freq=1000000, save_path=CHECKPOINT_DIR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "ea1fd388",
   "metadata": {},
   "outputs": [
    {
     "ename": "AssertionError",
     "evalue": "The algorithm only supports <class 'gym.spaces.box.Box'> as action spaces but Box(-1.0, 1.0, (3,), float64) was provided",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mAssertionError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_17260\\3532872291.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mmodel\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mDDPG\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"MlpPolicy\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0menv\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtensorboard_log\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mLOG_DIR\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[1;32mc:\\Users\\Noah Ryu\\AppData\\Local\\Programs\\Python\\Python37\\lib\\site-packages\\stable_baselines3\\ddpg\\ddpg.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, policy, env, learning_rate, buffer_size, learning_starts, batch_size, tau, gamma, train_freq, gradient_steps, action_noise, replay_buffer_class, replay_buffer_kwargs, optimize_memory_usage, tensorboard_log, policy_kwargs, verbose, seed, device, _init_setup_model)\u001b[0m\n\u001b[0;32m    103\u001b[0m             \u001b[0mtarget_noise_clip\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0.0\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    104\u001b[0m             \u001b[0mtarget_policy_noise\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0.1\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 105\u001b[1;33m             \u001b[0m_init_setup_model\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    106\u001b[0m         )\n\u001b[0;32m    107\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mc:\\Users\\Noah Ryu\\AppData\\Local\\Programs\\Python\\Python37\\lib\\site-packages\\stable_baselines3\\td3\\td3.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, policy, env, learning_rate, buffer_size, learning_starts, batch_size, tau, gamma, train_freq, gradient_steps, action_noise, replay_buffer_class, replay_buffer_kwargs, optimize_memory_usage, policy_delay, target_policy_noise, target_noise_clip, tensorboard_log, policy_kwargs, verbose, seed, device, _init_setup_model)\u001b[0m\n\u001b[0;32m    118\u001b[0m             \u001b[0moptimize_memory_usage\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0moptimize_memory_usage\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    119\u001b[0m             \u001b[0msupported_action_spaces\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mspaces\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mBox\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 120\u001b[1;33m             \u001b[0msupport_multi_env\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    121\u001b[0m         )\n\u001b[0;32m    122\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mc:\\Users\\Noah Ryu\\AppData\\Local\\Programs\\Python\\Python37\\lib\\site-packages\\stable_baselines3\\common\\off_policy_algorithm.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, policy, env, learning_rate, buffer_size, learning_starts, batch_size, tau, gamma, train_freq, gradient_steps, action_noise, replay_buffer_class, replay_buffer_kwargs, optimize_memory_usage, policy_kwargs, tensorboard_log, verbose, device, support_multi_env, monitor_wrapper, seed, use_sde, sde_sample_freq, use_sde_at_warmup, sde_support, supported_action_spaces)\u001b[0m\n\u001b[0;32m    117\u001b[0m             \u001b[0muse_sde\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0muse_sde\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    118\u001b[0m             \u001b[0msde_sample_freq\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0msde_sample_freq\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 119\u001b[1;33m             \u001b[0msupported_action_spaces\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0msupported_action_spaces\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    120\u001b[0m         )\n\u001b[0;32m    121\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbuffer_size\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mbuffer_size\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mc:\\Users\\Noah Ryu\\AppData\\Local\\Programs\\Python\\Python37\\lib\\site-packages\\stable_baselines3\\common\\base_class.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, policy, env, learning_rate, policy_kwargs, tensorboard_log, verbose, device, support_multi_env, monitor_wrapper, seed, use_sde, sde_sample_freq, supported_action_spaces)\u001b[0m\n\u001b[0;32m    171\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0msupported_action_spaces\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    172\u001b[0m                 assert isinstance(self.action_space, supported_action_spaces), (\n\u001b[1;32m--> 173\u001b[1;33m                     \u001b[1;34mf\"The algorithm only supports {supported_action_spaces} as action spaces \"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    174\u001b[0m                     \u001b[1;34mf\"but {self.action_space} was provided\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    175\u001b[0m                 )\n",
      "\u001b[1;31mAssertionError\u001b[0m: The algorithm only supports <class 'gym.spaces.box.Box'> as action spaces but Box(-1.0, 1.0, (3,), float64) was provided"
     ]
    }
   ],
   "source": [
    "model = DDPG(\"MlpPolicy\", env, tensorboard_log=LOG_DIR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "b80a3ed4",
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'model' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_17260\\1190813584.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlearn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtotal_timesteps\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m10000000\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcallback\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      2\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"JSBSim_10000000_steps\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mNameError\u001b[0m: name 'model' is not defined"
     ]
    }
   ],
   "source": [
    "model.learn(total_timesteps=10000000, callback=callback)\n",
    "model.save(\"JSBSim_10000000_steps\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "72842db2",
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'model' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_17260\\4222837208.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      4\u001b[0m     \u001b[0mtotal_reward\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m     \u001b[1;32mwhile\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 6\u001b[1;33m         \u001b[0maction\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      7\u001b[0m         \u001b[0mobs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minfo\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maction\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      8\u001b[0m         \u001b[0mtotal_reward\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mNameError\u001b[0m: name 'model' is not defined"
     ]
    }
   ],
   "source": [
    "for episode in range(5):\n",
    "    obs = env.reset()\n",
    "    done = False\n",
    "    total_reward = 0\n",
    "    while not done:\n",
    "        action, _ = model.predict(obs)\n",
    "        obs, reward, done, info = env.step(int(action))\n",
    "        total_reward += reward\n",
    "    print(\"Total Reward for episode {} is {}\".format(episode, total_reward))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f0daa1ab",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.9"
  },
  "vscode": {
   "interpreter": {
    "hash": "fc676d0716d313b34d9b58671be5ff89ed5ca710c84a0894db60f3144580aba8"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}

Solution

  • Stable Baselines 3, at least up to 1.7.0, depends on gym, not on its newer gymnasium equivalent.

    Although import gymnasium as gym should do the trick within your own code, some of the Stable Baselines3 code still performs imports such as (see td3.py for instance):

    from gym import spaces
    

    and uses the gym spaces to validate your gymnasium environment's action space.

    You can check yourself by running type(env.action_space), you'll see that it returns gymnasium.spaces.box.Box instead of gym.spaces.box.Box.

    You can read the comments on this PR to have more details on the potential future solutions.