Keras Sequential model input shape

I want to train a neural net based on a numpy array with 4 entries as the X-data and another array with one entry as the y-data.

X_train = [x1, x2, x3, x4]
y_train = [y1]

A rather simple thing I thought, but I can't get the input shape to work. I also found very little information about how the input shape works: Do you have to specify only the X data? What about the y data?

I already tried setting input_dim = 4, since that was the first logical thing to do but I got the following Error: Error when checking input: expected dense_1_input to have shape (4,) but got array with shape (1,)

I then tried setting input_dim = (4, 1), since I thought the y data was causing that issue. But again I got an error message: Error when checking input: expected dense_1_input to have 3 dimensions, but got array with shape (4, 1)

Heres the code:

# importing the packages
import gym
import numpy as np
from collections import deque

from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from keras.wrappers.scikit_learn import KerasRegressor

from joblib import Parallel

# creating the environment
env = gym.make('CartPole-v1')

#defining global variables
lr=0.0001
decay=0.001
batch_size=None

# creating a deep learning model with keras
def model():
    model = Sequential()

    model.add(Dense(64, input_dim=4, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(16, activation='relu'))

    model.add(Dense(1, activation='sigmoid'))

    model.compile(Adam(lr=lr, decay=decay), loss='mse')
    model.summary()
    return model

# running the game
for i_episodes in range(200):
    env.reset()
    for i in range(100):
        env.render()
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)

        # observation = ndarray float64
        # reward = float
        # done = bool
        # action = int
        # info = empty

        observation = np.asarray(observation)
        reward = np.asarray(reward)
        action = np.asarray(action)

        # print(observation.dtype, reward.dtype, action.dtype)
        # print(observation.shape, action.shape)

        estimator = KerasRegressor(build_fn=model, epochs=30, batch_size=3, verbose=1)
        estimator.fit(observation, action)

        if done:
            break
env.close()

If someone could explain how the input shapes work would be much appreciated.

Solution

Input shape always expect the batch size as first dimention.

For example in your case, the following layer does not expect an array of shape (4,)

Dense(64, input_dim=4, activation='relu')

The input shape of this dense layer is a tensor of shape (n, 4) where n is the batch size.

To pass your observation to the model you first need to expand its dims as follows:

observation = np.asarray(observation)
observation = np.expand_dims(observation, axis=0) # From shape (4,) to (1, 4)
estimator.fit(observation, action)

Your code should look like this.

# creating a deep learning model with keras
def build_model():
    model = Sequential()

    model.add(Dense(64, input_dim=4, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(16, activation='relu'))

    model.add(Dense(1, activation='sigmoid'))

    model.compile(Adam(lr=lr, decay=decay), loss='mse')
    model.summary()
    return model

model = build_model()

# running the game
for i_episodes in range(200):
    env.reset()
    for i in range(100):
        env.render()
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)

        # observation = ndarray float64
        # reward = float
        # done = bool
        # action = int
        # info = empty

        observation = np.asarray(observation)
        reward = np.asarray(reward)
        action = np.asarray(action)

        model.fit(np.expand_dims(observation, axis=0), np.expand_dims(action, axis=0))

Also if you are learning DQN check out this article