I try to train a cartpole, an example of reinforcement learning, using DQN.
But I got a running error.
At this point
target = self.model.predict(states)
I can't figure out why this is happening, even looking at the documentation that describes what the function does.
link: https://www.tensorflow.org/api_docs/python/tf/keras/Model#predict
Is the model declaration invalid?
Does anyone know of a solution?
I need help
version: keras 2.10.0, python 3.10.13, gym 0.26.2
below is all the code
import sys
import gym
import random
import numpy as np
from collections import deque
from keras.layers import Dense
from keras.optimizers import Adam
from keras.models import Sequential
class DQNAgent:
def __init__(self, state_size, action_size):
self.render = False
self.load_model = False
self.state_size = state_size
self.action_size = action_size
self.discount_factor = 0.99
self.learning_rate = 0.001
self.epsilon = 1.0
self.epsilon_decay = 0.999
self.epsilon_min = 0.01
self.batch_size = 64
self.train_start = 1000
self.memory = deque(maxlen=2000)
self.model = self.build_model()
self.target_model = self.build_model()
self.update_target_model()
def build_model(self):
model = Sequential()
model.add(Dense(24, input_dim=self.state_size, activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(24, activation='relu', kernel_initializer='he_uniform'))
model.add(Dense(self.action_size, activation='linear', kernel_initializer='he_uniform'))
model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
return model
def update_target_model(self):
self.target_model.set_weights(self.model.get_weights())
def get_action(self, state):
if np.random.rand() <= self.epsilon:
return random.randrange(self.action_size)
else:
q_value = self.model.predict(state)
return np.argmax(q_value[0])
def append_sample(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
def train_model(self):
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
mini_batch = random.sample(self.memory, self.batch_size)
states = np.zeros((self.batch_size, self.state_size))
next_states = np.zeros((self.batch_size, self.state_size))
actions, rewards, dones = [], [], []
for i in range(self.batch_size):
states[i] = mini_batch[i][0]
actions.append(mini_batch[i][1])
rewards.append(mini_batch[i][2])
next_states[i] = mini_batch[i][3]
dones.append(mini_batch[i][4])
#error
target = self.model.predict(states)
target_val = self.target_model.predict(next_states)
for i in range(self.batch_size):
if dones[i]:
target[i][actions[i]] = rewards[i]
else:
target[i][actions[i]] = rewards[i] + self.discount_factor * (np.amax(target_val[i]))
self.model.fit(states, target, batch_size=self.batch_size, epochs=1, verbose=0)
if __name__ == "__main__":
env = gym.make("CartPole-v1", render_mode="human")
random_seed = 82
env.action_space.seed(random_seed)
state_size = env.observation_space.shape[0]
observation, info = env.reset(seed=random_seed)
EPISODES = 1000
agent = DQNAgent(state_size, env.action_space.n)
scores, episodes = [], []
for e in range(EPISODES):
done = False
score = 0
state = env.reset()[0]
while not done:
action = agent.get_action(state)
next_state, reward, done, _, _ = env.step(action)
reward = reward if not done or score == 499 else -100
agent.append_sample(state, action, reward, next_state, done)
if len(agent.memory) >= agent.train_start:
agent.train_model()
score += reward
state = next_state
if done:
agent.update_target_model()
score = score if score == 500 else score + 100
scores.append(score)
episodes.append(e)
print("episode:", e, " score:", score, " memory length:",
len(agent.memory), " epsilon:", agent.epsilon)
if np.mean(scores[-min(10, len(scores)):]) > 490:
sys.exit()
This is error:
Traceback (most recent call last):
File "C:\Users\cglab\Desktop\Match3_DQN\main.py", line 101, in <module>
action = agent.get_action(state)
File "C:\Users\cglab\Desktop\Match3_DQN\main.py", line 48, in get_action
q_value = self.model.predict(state)
File "c:\Users\cglab\anaconda3\envs\test2\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
raise e.with_traceback(filtered_tb) from None
File "c:\Users\cglab\anaconda3\envs\test2\lib\site-packages\tensorflow\python\eager\execute.py", line 54, in quick_execute
tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
tensorflow.python.framework.errors_impl.InvalidArgumentError: Graph execution error:
Detected at node 'sequential/dense/MatMul' defined at (most recent call last):
File "C:\Users\cglab\Desktop\Match3_DQN\main.py", line 101, in <module>
action = agent.get_action(state)
File "C:\Users\cglab\Desktop\Match3_DQN\main.py", line 48, in get_action
q_value = self.model.predict(state)
File "c:\Users\cglab\anaconda3\envs\test2\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
return fn(*args, **kwargs)
File "c:\Users\cglab\anaconda3\envs\test2\lib\site-packages\keras\engine\training.py", line 2253, in predict
tmp_batch_outputs = self.predict_function(iterator)
File "c:\Users\cglab\anaconda3\envs\test2\lib\site-packages\keras\engine\training.py", line 2041, in predict_function
return step_function(self, iterator)
File "c:\Users\cglab\anaconda3\envs\test2\lib\site-packages\keras\engine\training.py", line 2027, in step_function
outputs = model.distribute_strategy.run(run_step, args=(data,))
File "c:\Users\cglab\anaconda3\envs\test2\lib\site-packages\keras\engine\training.py", line 2015, in run_step
outputs = model.predict_step(data)
File "c:\Users\cglab\anaconda3\envs\test2\lib\site-packages\keras\engine\training.py", line 1983, in predict_step
return self(x, training=False)
File "c:\Users\cglab\anaconda3\envs\test2\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
return fn(*args, **kwargs)
File "c:\Users\cglab\anaconda3\envs\test2\lib\site-packages\keras\engine\training.py", line 557, in __call__
return super().__call__(*args, **kwargs)
File "c:\Users\cglab\anaconda3\envs\test2\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
return fn(*args, **kwargs)
File "c:\Users\cglab\anaconda3\envs\test2\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
outputs = call_fn(inputs, *args, **kwargs)
File "c:\Users\cglab\anaconda3\envs\test2\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
return fn(*args, **kwargs)
File "c:\Users\cglab\anaconda3\envs\test2\lib\site-packages\keras\engine\sequential.py", line 410, in call
return super().call(inputs, training=training, mask=mask)
File "c:\Users\cglab\anaconda3\envs\test2\lib\site-packages\keras\engine\functional.py", line 510, in call
return self._run_internal_graph(inputs, training=training, mask=mask)
File "c:\Users\cglab\anaconda3\envs\test2\lib\site-packages\keras\engine\functional.py", line 667, in _run_internal_graph
outputs = node.layer(*args, **kwargs)
File "c:\Users\cglab\anaconda3\envs\test2\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
return fn(*args, **kwargs)
File "c:\Users\cglab\anaconda3\envs\test2\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
outputs = call_fn(inputs, *args, **kwargs)
File "c:\Users\cglab\anaconda3\envs\test2\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
return fn(*args, **kwargs)
File "c:\Users\cglab\anaconda3\envs\test2\lib\site-packages\keras\layers\core\dense.py", line 241, in call
outputs = tf.matmul(a=inputs, b=self.kernel)
Node: 'sequential/dense/MatMul'
In[0] and In[1] has different ndims: [4] vs. [4,24]
[[{{node sequential/dense/MatMul}}]] [Op:__inference_predict_function_14218]
Welcome to all comments. thx.
I found the error, and it's not where you marked it in the code. The error message even tells one where to look with:
Detected at node 'sequential/dense/MatMul' defined at (most recent call last):
File "C:\Users\cglab\Desktop\Match3_DQN\main.py", line 101, in <module>
action = agent.get_action(state)
File "C:\Users\cglab\Desktop\Match3_DQN\main.py", line 48, in get_action
q_value = self.model.predict(state) <-------------------------
It is in this part:
def get_action(self, state):
if np.random.rand() <= self.epsilon:
return random.randrange(self.action_size)
else:
q_value = self.model.predict(state)
return np.argmax(q_value[0])
The problem is that state
is just one state with shape (4,)
, and a TF model always expects a batch of states. You can fix it with the following line directly above the predict
call:
state = np.expand_dims(state, 0)
This will bring it to the shape (1, 4)
, which is a "batch" of just one example for the model.