I am trying to adapt this example from the git repo, basically by using their other example from the same repo here (which uses deconvolution).
I cannot quite figure out where I am going wrong, but it seems very basic. Here we are:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
# Keras uses TensforFlow backend as default
from keras.layers import Input, Dense, Lambda, Flatten, Reshape
from keras.layers import Conv1D,UpSampling1D
from keras.models import Model
from keras import backend as K
from keras import metrics
from keras.datasets import mnist
# Input image dimensions
steps, original_dim = 1, 28*28 # Take care here since we are changing this according to the data
# Number of convolutional filters to use
filters = 64
# Convolution kernel size
num_conv = 6
# Set batch size
batch_size = 100
# Decoder output dimensionality
decOutput = 10
latent_dim = 20
intermediate_dim = 256
epsilon_std = 1.0
epochs = 5
x = Input(batch_shape=(batch_size,steps,original_dim))
# Play around with padding here, not sure what to go with.
conv_1 = Conv1D(1,
kernel_size=num_conv,
padding='same',
activation='relu')(x)
conv_2 = Conv1D(filters,
kernel_size=num_conv,
padding='same',
activation='relu',
strides=1)(conv_1)
flat = Flatten()(conv_2) # Since we are passing flat data anyway, we probably don't need this.
hidden = Dense(intermediate_dim, activation='relu')(flat)
z_mean = Dense(latent_dim)(hidden)
z_log_var = Dense(latent_dim)(hidden)
def sampling(args):
z_mean, z_log_var = args
epsilon = K.random_normal(shape=(batch_size, latent_dim),
mean=0., stddev=epsilon_std)
return z_mean + K.exp(z_log_var ) * epsilon # the original VAE divides z_log_var with two -- why?
# note that "output_shape" isn't necessary with the TensorFlow backend
# so you could write `Lambda(sampling)([z_mean, z_log_var])`
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])
# we instantiate these layers separately so as to reuse them later
decoder_h = Dense(intermediate_dim, activation='relu')
decoder_mean = Dense(original_dim, activation='sigmoid')
h_decoded = decoder_h(z)
x_decoded_mean = decoder_mean(h_decoded)
def vae_loss(x, x_decoded_mean):
xent_loss = original_dim * metrics.binary_crossentropy(x, x_decoded_mean)
kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) # Double check wtf this is supposed to be
return xent_loss + kl_loss
vae = Model(x, x_decoded_mean)
vae.compile(optimizer='adam', loss=vae_loss) # 'rmsprop'
vae.summary()
Which comes out as:
____________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
====================================================================================================
input_31 (InputLayer) (100, 1, 784) 0
____________________________________________________________________________________________________
conv1d_87 (Conv1D) (100, 1, 1) 4705
____________________________________________________________________________________________________
conv1d_88 (Conv1D) (100, 1, 64) 448
____________________________________________________________________________________________________
flatten_29 (Flatten) (100, 64) 0
____________________________________________________________________________________________________
dense_134 (Dense) (100, 256) 16640
____________________________________________________________________________________________________
dense_135 (Dense) (100, 20) 5140
____________________________________________________________________________________________________
dense_136 (Dense) (100, 20) 5140
____________________________________________________________________________________________________
lambda_24 (Lambda) (100, 20) 0
____________________________________________________________________________________________________
dense_137 (Dense) (100, 256) 5376
____________________________________________________________________________________________________
dense_138 (Dense) (100, 784) 201488
====================================================================================================
Total params: 238,937.0
Trainable params: 238,937.0
Non-trainable params: 0.0
Then if I try to run this, as so:
from keras.datasets import mnist
img_rows, img_cols = 1,28*28
original_img_size = (img_rows, img_cols)
# train the VAE on MNIST digits
(x_train, _), (x_test, y_test) = mnist.load_data()
x_train = x_train.astype('float32') / 255.
x_train = x_train.reshape((x_train.shape[0],) + original_img_size)
print('x_train.shape:', x_train.shape)
N = 1000
epochs = 2
batch_size = int(N/10)
vae.fit(x_train[0:N,:], x_train[0:N,:],
shuffle=True,
epochs=epochs,
batch_size=batch_size)
I get this error, but I cannot quite figure out how to get past it. It has something to with going from Conv1D to Dense...
ValueError: Cannot feed value of shape (100, 1, 784) for Tensor u'dense_138_target:0', which has shape '(?, ?)'
Try reshaping x_decoded_mean
to your input shape since x_train[0:N,:]
is shaped (1,784)
but your output is (784,)
something like
x_decoded_mean = Reshape([1,784])(x_decoded_mean)