Search code examples
pythonmachine-learningopenai-gymrayrllib

Errors when trying to use DQN algorithm for FrozenLake Openai game


I am trying to make a very simple DQN algorithm work with the FrozenLake-v0 game but I am getting errors. I understand that it could be an overkill using DQN instead of a Q-table, but I nonetheless would like it to work. Here is the code:

import gym
import numpy as np
import tensorflow as tf

env = gym.make("FrozenLake-v0")

n_actions = env.action_space.n
input_dim = env.observation_space.n
model = tf.keras.Sequential() 
model.add(tf.keras.layers.Dense(64, input_dim = input_dim , activation = 'relu'))
model.add(tf.keras.layers.Dense(32, activation = 'relu'))
model.add(tf.keras.layers.Dense(n_actions, activation = 'linear'))
model.compile(optimizer=tf.keras.optimizers.Adam(), loss = 'mse')

def replay(replay_memory, minibatch_size=32):
    minibatch = np.random.choice(replay_memory, minibatch_size, replace=True)
    s_l =      np.array(list(map(lambda x: x['s'], minibatch)))
    a_l =      np.array(list(map(lambda x: x['a'], minibatch)))
    r_l =      np.array(list(map(lambda x: x['r'], minibatch)))
    sprime_l = np.array(list(map(lambda x: x['sprime'], minibatch)))
    done_l   = np.array(list(map(lambda x: x['done'], minibatch)))
    qvals_sprime_l = model.predict(sprime_l)
    target_f = model.predict(s_l) 
    for i,(s,a,r,qvals_sprime, done) in enumerate(zip(s_l,a_l,r_l,qvals_sprime_l, done_l)): 
        if not done:  target = r + gamma * np.max(qvals_sprime)
        else:         target = r
        target_f[i][a] = target
    model.fit(s_l,target_f, epochs=1, verbose=0)
    return model

n_episodes = 500
gamma = 0.99
epsilon = 0.9
minibatch_size = 32
r_sums = []  
replay_memory = []
mem_max_size = 100000

for n in range(n_episodes): 
    s = env.reset()
    done=False
    r_sum = 0
    print(s)
    while not done: 
        qvals_s = model.predict(s.reshape(16))
        if np.random.random() < epsilon:  a = env.action_space.sample()
        else:                             a = np.argmax(qvals_s); 
        sprime, r, done, info = env.step(a)
        r_sum += r 
        if len(replay_memory) > mem_max_size:
            replay_memory.pop(0)
        replay_memory.append({"s":s,"a":a,"r":r,"sprime":sprime,"done":done})
        s=sprime
        model=replay(replay_memory, minibatch_size = minibatch_size)
    if epsilon > 0.1:      epsilon -= 0.001
    r_sums.append(r_sum)
    if n % 100 == 0: print(n)

And the errors I am getting are:

Traceback (most recent call last):
  File "froz_versuch.py", line 48, in <module>
    qvals_s = model.predict(s.reshape(16))
ValueError: cannot reshape array of size 1 into shape (16,)

And when I try to then change qvals_s = model.predict(s.reshape(16)) to qvals_s = model.predict(s.reshape(1)) I get the error:

ValueError: Input 0 of layer sequential is incompatible with the layer: expected axis -1 of input shape to have value 16 but received input with shape [None, 1]

I'd appreciate any help!


Solution

  • The problem had to do with one-hot encoding. I had to encode s and sprime so that they had the dimensions 16. This change in the for loop made it work. The encode() function could be moved outside of the loop but I'm just testing now so optimization comes afterward. Here is the solution:

     for n in range(n_episodes): 
        ss = env.reset()
        states_total = 16
        data = [[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]]
        def encode(data, states_total):
            targets = np.array(data).reshape(-1)
            return np.eye(states_total)[targets]
        m = encode(data,states_total)
        s = m[ss]
        #print(s)
        #print(len(s))
        done=False
        r_sum = 0
        while not done: 
            #env.render()
            qvals_s = model.predict(s.reshape(1,-1))
            if np.random.random() < epsilon:  a = env.action_space.sample()
            else:                             a = np.argmax(qvals_s); 
            sprime, r, done, info = env.step(a)
            r_sum += r
            q = encode(data,states_total)
            sprime = q[sprime]
            if len(replay_memory) > mem_max_size:
                replay_memory.pop(0)
            replay_memory.append({"s":s,"a":a,"r":r,"sprime":sprime,"done":done})
            #s = n[sprime]
            s=sprime
            model=replay(replay_memory, minibatch_size = minibatch_size)
        if epsilon > 0.001:      epsilon -= 0.001
        r_sums.append(r_sum)
        print(r_sum)
        print(epsilon)
        if n % 100 == 0: print(n)