Search code examples
pythonkeras

Keras Sequential model input shape


I want to train a neural net based on a numpy array with 4 entries as the X-data and another array with one entry as the y-data.

X_train = [x1, x2, x3, x4]
y_train = [y1]

A rather simple thing I thought, but I can't get the input shape to work. I also found very little information about how the input shape works: Do you have to specify only the X data? What about the y data?

I already tried setting input_dim = 4, since that was the first logical thing to do but I got the following Error: Error when checking input: expected dense_1_input to have shape (4,) but got array with shape (1,)

I then tried setting input_dim = (4, 1), since I thought the y data was causing that issue. But again I got an error message: Error when checking input: expected dense_1_input to have 3 dimensions, but got array with shape (4, 1)

Heres the code:

# importing the packages
import gym
import numpy as np
from collections import deque

from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras.wrappers.scikit_learn import KerasRegressor

from joblib import Parallel

# creating the environment
env = gym.make('CartPole-v1')

#defining global variables
lr=0.0001
decay=0.001
batch_size=None

# creating a deep learning model with keras
def model():
    model = Sequential()

    model.add(Dense(64, input_dim=4, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(16, activation='relu'))

    model.add(Dense(1, activation='sigmoid'))

    model.compile(Adam(lr=lr, decay=decay), loss='mse')
    model.summary()
    return model

# running the game
for i_episodes in range(200):
    env.reset()
    for i in range(100):
        env.render()
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)

        # observation = ndarray float64
        # reward = float
        # done = bool
        # action = int
        # info = empty

        observation = np.asarray(observation)
        reward = np.asarray(reward)
        action = np.asarray(action)

        # print(observation.dtype, reward.dtype, action.dtype)
        # print(observation.shape, action.shape)

        estimator = KerasRegressor(build_fn=model, epochs=30, batch_size=3, verbose=1)
        estimator.fit(observation, action)

        if done:
            break
env.close()

If someone could explain how the input shapes work would be much appreciated.


Solution

  • Input shape always expect the batch size as first dimention.

    For example in your case, the following layer does not expect an array of shape (4,)

    Dense(64, input_dim=4, activation='relu')
    

    The input shape of this dense layer is a tensor of shape (n, 4) where n is the batch size.

    To pass your observation to the model you first need to expand its dims as follows:

    observation = np.asarray(observation)
    observation = np.expand_dims(observation, axis=0) # From shape (4,) to (1, 4)
    estimator.fit(observation, action)
    

    Your code should look like this.

    # creating a deep learning model with keras
    def build_model():
        model = Sequential()
    
        model.add(Dense(64, input_dim=4, activation='relu'))
        model.add(Dense(32, activation='relu'))
        model.add(Dense(16, activation='relu'))
    
        model.add(Dense(1, activation='sigmoid'))
    
        model.compile(Adam(lr=lr, decay=decay), loss='mse')
        model.summary()
        return model
    
    model = build_model()
    
    # running the game
    for i_episodes in range(200):
        env.reset()
        for i in range(100):
            env.render()
            action = env.action_space.sample()
            observation, reward, done, info = env.step(action)
    
            # observation = ndarray float64
            # reward = float
            # done = bool
            # action = int
            # info = empty
    
            observation = np.asarray(observation)
            reward = np.asarray(reward)
            action = np.asarray(action)
    
            model.fit(np.expand_dims(observation, axis=0), np.expand_dims(action, axis=0))
    
    

    Also if you are learning DQN check out this article