So basically I tried converting this custom gym environment from https://github.com/Gor-Ren/gym-jsbsim to use farama foundation's gymnasium api. This is my repo whih I am working on: https://github.com/sryu1/jsbgym When I try training the environment with gym-jsbsim, it works, but with the gymnasium environment, I get the error that is in the title... I feel like Lines 234 to 242 in tasks.py are somewhere that causes the problem. the same error appears for all algorithms... If anyone could tell me what I did wrong, it'd be very appreciated! I tried training the custom environment with stable baselines 3 all algos that support Box and they all get the same error.
This is my ipynb in raw format, if you just open a new ipynb file with notepad then past this in then open again, it should work.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "7e5a3876",
"metadata": {},
"outputs": [],
"source": [
"import jsbsim\n",
"import gymnasium as gym\n",
"import jsbgym\n",
"import os\n",
"from stable_baselines3 import DDPG\n",
"from stable_baselines3.common.callbacks import BaseCallback\n",
"from stable_baselines3.common.monitor import Monitor\n",
"from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "c78d0a36",
"metadata": {},
"outputs": [],
"source": [
"env = gym.make('JSBSim-TurnHeadingControlTask-Cessna172P-Shaping.STANDARD-NoFG-v0')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "17ea6f3e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([ 5.00000000e+03, -1.52383093e-16, 1.16583160e-16, 2.02536000e+02,\n",
" -4.26325641e-14, -7.10542736e-15, 0.00000000e+00, 0.00000000e+00,\n",
" 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,\n",
" 0.00000000e+00, -3.72529030e-09, -9.68166768e-15, -1.60633375e+02,\n",
" 2.99000000e+02]),\n",
" {})"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"env.reset()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "60a7ecab",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total Reward for episode 1 is 24.43895374007404\n",
"Total Reward for episode 2 is 17.88229242588352\n",
"Total Reward for episode 3 is 20.844080298653026\n",
"Total Reward for episode 4 is 23.09412403738447\n",
"Total Reward for episode 5 is 22.540357474496297\n"
]
}
],
"source": [
"for episode in range(1, 6):\n",
" obs = env.reset()\n",
" done = False\n",
" total_reward = 0\n",
" while not done:\n",
" obs, reward, done, _, info = env.step(env.action_space.sample())\n",
" total_reward += reward\n",
" print(\"Total Reward for episode {} is {}\".format(episode, total_reward))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "d819a741",
"metadata": {},
"outputs": [],
"source": [
"class TrainAndLoggingCallback(BaseCallback):\n",
" def __init__(self, check_freq, save_path, verbose=1):\n",
" super(TrainAndLoggingCallback, self).__init__(verbose)\n",
" self.check_freq = check_freq\n",
" self.save_path = save_path\n",
"\n",
" def _init_callback(self):\n",
" if self.save_path is not None:\n",
" os.makedirs(self.save_path, exist_ok=True)\n",
"\n",
" def _on_step(self):\n",
" if self.n_calls % self.check_freq == 0:\n",
" model_path = os.path.join(\n",
" self.save_path, \"best_model_{}\".format(self.n_calls)\n",
" )\n",
" self.model.save(model_path)\n",
"\n",
" return True"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "a0f962d6",
"metadata": {},
"outputs": [],
"source": [
"CHECKPOINT_DIR = \"./train/\"\n",
"LOG_DIR = \"./logs/\""
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "d2ab0033",
"metadata": {},
"outputs": [],
"source": [
"callback = TrainAndLoggingCallback(check_freq=1000000, save_path=CHECKPOINT_DIR)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "ea1fd388",
"metadata": {},
"outputs": [
{
"ename": "AssertionError",
"evalue": "The algorithm only supports <class 'gym.spaces.box.Box'> as action spaces but Box(-1.0, 1.0, (3,), float64) was provided",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mAssertionError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_17260\\3532872291.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mmodel\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mDDPG\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"MlpPolicy\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0menv\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtensorboard_log\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mLOG_DIR\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;32mc:\\Users\\Noah Ryu\\AppData\\Local\\Programs\\Python\\Python37\\lib\\site-packages\\stable_baselines3\\ddpg\\ddpg.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, policy, env, learning_rate, buffer_size, learning_starts, batch_size, tau, gamma, train_freq, gradient_steps, action_noise, replay_buffer_class, replay_buffer_kwargs, optimize_memory_usage, tensorboard_log, policy_kwargs, verbose, seed, device, _init_setup_model)\u001b[0m\n\u001b[0;32m 103\u001b[0m \u001b[0mtarget_noise_clip\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0.0\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 104\u001b[0m \u001b[0mtarget_policy_noise\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0.1\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 105\u001b[1;33m \u001b[0m_init_setup_model\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 106\u001b[0m )\n\u001b[0;32m 107\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mc:\\Users\\Noah Ryu\\AppData\\Local\\Programs\\Python\\Python37\\lib\\site-packages\\stable_baselines3\\td3\\td3.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, policy, env, learning_rate, buffer_size, learning_starts, batch_size, tau, gamma, train_freq, gradient_steps, action_noise, replay_buffer_class, replay_buffer_kwargs, optimize_memory_usage, policy_delay, target_policy_noise, target_noise_clip, tensorboard_log, policy_kwargs, verbose, seed, device, _init_setup_model)\u001b[0m\n\u001b[0;32m 118\u001b[0m \u001b[0moptimize_memory_usage\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0moptimize_memory_usage\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 119\u001b[0m \u001b[0msupported_action_spaces\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mspaces\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mBox\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 120\u001b[1;33m \u001b[0msupport_multi_env\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 121\u001b[0m )\n\u001b[0;32m 122\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mc:\\Users\\Noah Ryu\\AppData\\Local\\Programs\\Python\\Python37\\lib\\site-packages\\stable_baselines3\\common\\off_policy_algorithm.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, policy, env, learning_rate, buffer_size, learning_starts, batch_size, tau, gamma, train_freq, gradient_steps, action_noise, replay_buffer_class, replay_buffer_kwargs, optimize_memory_usage, policy_kwargs, tensorboard_log, verbose, device, support_multi_env, monitor_wrapper, seed, use_sde, sde_sample_freq, use_sde_at_warmup, sde_support, supported_action_spaces)\u001b[0m\n\u001b[0;32m 117\u001b[0m \u001b[0muse_sde\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0muse_sde\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 118\u001b[0m \u001b[0msde_sample_freq\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0msde_sample_freq\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 119\u001b[1;33m \u001b[0msupported_action_spaces\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0msupported_action_spaces\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 120\u001b[0m )\n\u001b[0;32m 121\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbuffer_size\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mbuffer_size\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mc:\\Users\\Noah Ryu\\AppData\\Local\\Programs\\Python\\Python37\\lib\\site-packages\\stable_baselines3\\common\\base_class.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, policy, env, learning_rate, policy_kwargs, tensorboard_log, verbose, device, support_multi_env, monitor_wrapper, seed, use_sde, sde_sample_freq, supported_action_spaces)\u001b[0m\n\u001b[0;32m 171\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0msupported_action_spaces\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 172\u001b[0m assert isinstance(self.action_space, supported_action_spaces), (\n\u001b[1;32m--> 173\u001b[1;33m \u001b[1;34mf\"The algorithm only supports {supported_action_spaces} as action spaces \"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 174\u001b[0m \u001b[1;34mf\"but {self.action_space} was provided\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 175\u001b[0m )\n",
"\u001b[1;31mAssertionError\u001b[0m: The algorithm only supports <class 'gym.spaces.box.Box'> as action spaces but Box(-1.0, 1.0, (3,), float64) was provided"
]
}
],
"source": [
"model = DDPG(\"MlpPolicy\", env, tensorboard_log=LOG_DIR)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "b80a3ed4",
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'model' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_17260\\1190813584.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlearn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtotal_timesteps\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m10000000\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcallback\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcallback\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msave\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"JSBSim_10000000_steps\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mNameError\u001b[0m: name 'model' is not defined"
]
}
],
"source": [
"model.learn(total_timesteps=10000000, callback=callback)\n",
"model.save(\"JSBSim_10000000_steps\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "72842db2",
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'model' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_17260\\4222837208.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0mtotal_reward\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;36m0\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;32mwhile\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 6\u001b[1;33m \u001b[0maction\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0m_\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mobs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 7\u001b[0m \u001b[0mobs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minfo\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0menv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maction\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 8\u001b[0m \u001b[0mtotal_reward\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[0mreward\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mNameError\u001b[0m: name 'model' is not defined"
]
}
],
"source": [
"for episode in range(5):\n",
" obs = env.reset()\n",
" done = False\n",
" total_reward = 0\n",
" while not done:\n",
" action, _ = model.predict(obs)\n",
" obs, reward, done, info = env.step(int(action))\n",
" total_reward += reward\n",
" print(\"Total Reward for episode {} is {}\".format(episode, total_reward))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f0daa1ab",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.9"
},
"vscode": {
"interpreter": {
"hash": "fc676d0716d313b34d9b58671be5ff89ed5ca710c84a0894db60f3144580aba8"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Stable Baselines 3, at least up to 1.7.0
, depends on gym
, not on its newer gymnasium
equivalent.
Although import gymnasium as gym
should do the trick within your own code, some of the Stable Baselines3 code still performs imports such as (see td3.py for instance):
from gym import spaces
and uses the gym
spaces to validate your gymnasium
environment's action space.
You can check yourself by running type(env.action_space)
, you'll see that it returns gymnasium.spaces.box.Box
instead of gym.spaces.box.Box
.
You can read the comments on this PR to have more details on the potential future solutions.