One dimensional convolutional variational autoencoder in keras

I am trying to adapt this example from the git repo, basically by using their other example from the same repo here (which uses deconvolution).

I cannot quite figure out where I am going wrong, but it seems very basic. Here we are:

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
# Keras uses TensforFlow backend as default
from keras.layers import Input, Dense, Lambda, Flatten, Reshape
from keras.layers import Conv1D,UpSampling1D
from keras.models import Model
from keras import backend as K
from keras import metrics
from keras.datasets import mnist

# Input image dimensions
steps, original_dim = 1, 28*28 # Take care here since we are changing this according to the data
# Number of convolutional filters to use
filters = 64
# Convolution kernel size
num_conv = 6
# Set batch size
batch_size = 100
# Decoder output dimensionality
decOutput = 10

latent_dim = 20
intermediate_dim = 256
epsilon_std = 1.0
epochs = 5

x = Input(batch_shape=(batch_size,steps,original_dim))
# Play around with padding here, not sure what to go with.
conv_1 = Conv1D(1,
                kernel_size=num_conv,
                padding='same', 
                activation='relu')(x)
conv_2 = Conv1D(filters,
                kernel_size=num_conv,
                padding='same', 
                activation='relu',
                strides=1)(conv_1)
flat = Flatten()(conv_2) # Since we are passing flat data anyway, we probably don't need this.
hidden = Dense(intermediate_dim, activation='relu')(flat)
z_mean = Dense(latent_dim)(hidden)
z_log_var = Dense(latent_dim)(hidden)

def sampling(args):
    z_mean, z_log_var = args
    epsilon = K.random_normal(shape=(batch_size, latent_dim),
                              mean=0., stddev=epsilon_std)
    return z_mean + K.exp(z_log_var ) * epsilon # the original VAE divides z_log_var with two -- why?

# note that "output_shape" isn't necessary with the TensorFlow backend
# so you could write `Lambda(sampling)([z_mean, z_log_var])`
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])



# we instantiate these layers separately so as to reuse them later
decoder_h = Dense(intermediate_dim, activation='relu')
decoder_mean = Dense(original_dim, activation='sigmoid')

h_decoded = decoder_h(z)
x_decoded_mean = decoder_mean(h_decoded)

def vae_loss(x, x_decoded_mean):
    xent_loss = original_dim * metrics.binary_crossentropy(x, x_decoded_mean)
    kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) # Double check wtf this is supposed to be
    return xent_loss + kl_loss

vae = Model(x, x_decoded_mean)
vae.compile(optimizer='adam', loss=vae_loss) # 'rmsprop'
vae.summary()

Which comes out as:

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
====================================================================================================
input_31 (InputLayer)            (100, 1, 784)         0                                            
____________________________________________________________________________________________________
conv1d_87 (Conv1D)               (100, 1, 1)           4705                                         
____________________________________________________________________________________________________
conv1d_88 (Conv1D)               (100, 1, 64)          448                                          
____________________________________________________________________________________________________
flatten_29 (Flatten)             (100, 64)             0                                            
____________________________________________________________________________________________________
dense_134 (Dense)                (100, 256)            16640                                        
____________________________________________________________________________________________________
dense_135 (Dense)                (100, 20)             5140                                         
____________________________________________________________________________________________________
dense_136 (Dense)                (100, 20)             5140                                         
____________________________________________________________________________________________________
lambda_24 (Lambda)               (100, 20)             0                                            
____________________________________________________________________________________________________
dense_137 (Dense)                (100, 256)            5376                                         
____________________________________________________________________________________________________
dense_138 (Dense)                (100, 784)            201488                                       
====================================================================================================
Total params: 238,937.0
Trainable params: 238,937.0
Non-trainable params: 0.0

Then if I try to run this, as so:

from keras.datasets import mnist

img_rows, img_cols = 1,28*28
original_img_size = (img_rows, img_cols)

# train the VAE on MNIST digits
(x_train, _), (x_test, y_test) = mnist.load_data()

x_train = x_train.astype('float32') / 255.
x_train = x_train.reshape((x_train.shape[0],) + original_img_size)

print('x_train.shape:', x_train.shape)

N = 1000
epochs = 2
batch_size = int(N/10)
vae.fit(x_train[0:N,:], x_train[0:N,:],
        shuffle=True,
        epochs=epochs,
        batch_size=batch_size)

I get this error, but I cannot quite figure out how to get past it. It has something to with going from Conv1D to Dense...

ValueError: Cannot feed value of shape (100, 1, 784) for Tensor u'dense_138_target:0', which has shape '(?, ?)'

Solution

Try reshaping x_decoded_mean to your input shape since x_train[0:N,:] is shaped (1,784) but your output is (784,) something like

x_decoded_mean = Reshape([1,784])(x_decoded_mean)