Keras - Variational Autoencoder Incompatible shape

I am trying to adapt the code to achieve 1-D convolution using 1-D input. The model is compilable so you can see the layers and shapes in .summary(), but it throws the error when .fit() the model. it seems to occur in loss computation. Below is my code:

import numpy as np
from scipy.stats import norm

from keras.layers import Input, Dense, Lambda, Flatten, Reshape
from keras.layers import Conv1D, UpSampling1D
from keras.models import Model
from keras import backend as K
from keras import metrics

num_conv = 6
batch_size = 100
latent_dim = 2
intermediate_dim = 128
epochs = 50
epsilon_std = 1.0

x = Input(batch_shape=(batch_size, 310, 1)) 
conv_1 = Conv1D(1, kernel_size=num_conv,
                padding='same', activation='relu')(x)
conv_2 = Conv1D(64, kernel_size=num_conv,
                padding='same', strides=2, activation='relu')(conv_1)
conv_3 = Conv1D(64, kernel_size=num_conv,
                padding='same', activation='relu')(conv_2)

flatten = Flatten()(conv_3)
hidden = Dense(intermediate_dim, activation='relu')(flatten)

z_mean = Dense(latent_dim)(hidden)
z_log_var = Dense(latent_dim)(hidden)

def sampling(args):
    z_mean, z_log_var = args
    epsilon = K.random_normal(shape=(batch_size, latent_dim), 
                              mean=0., stddev=epsilon_std)
    return(z_mean + K.exp(z_log_var/2) * epsilon)

z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])

decoder_h = Dense(256, activation='relu')(z)
decoder = Dense(155, activation='relu')(decoder_h)
decoder = Reshape((155, 1))(decoder)
de_conv_1 = Conv1D(64, kernel_size=num_conv, 
                   padding='same', activation='relu')(decoder)
de_conv_2 = Conv1D(64, kernel_size=num_conv,
                   padding='same', activation='relu')(de_conv_1)
upsamp = UpSampling1D(2)(de_conv_2)
x_decoded_mean = Conv1D(1, kernel_size=num_conv,
                        padding='same', activation='relu')(upsamp)
x_decoded_mean = Reshape([310, 1])(x_decoded_mean)

def vae_loss(x, x_decoded_mean):
    x_ = x[:, 150:160, :]
    x_decoded_mean_ = x_decoded_mean[:, 150:160, :]
    xent_loss = 10 * metrics.mean_squared_error(x_, x_decoded_mean_)
    kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) -K.exp(z_log_var), axis=-1)
    return(xent_loss + kl_loss)

vae = Model(x, x_decoded_mean)
vae.summary()
vae.compile(optimizer='rmsprop', loss=vae_loss)

The input data shape is (n_sample, 310, 1). It is an one-D time-series but I include prior and posterior 150 frames to predict the middle 10 frames, resulting in 310 frames as input.

In vae_loss(), the reason that x and x_decoded_mean are sliced is that the purpose is to reconstruct the middle 10 frames with additional information of prior and posterior 150 frames. Therefore I want to force the model to focus on loss computed only from the middle 10 frames.

I got the following error when I .fit() the model:

# X.shape == (n_samples, 310, 1)
# n_samples % batch_size == 0

vae.fit(x=X, y=X, batch_size=batch_size,
        epochs=epochs,
        shuffle=True)

The long error below:

Epoch 1/50
---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
/Users/yjluo/WORK/pitchPerfect/vae/model2.py in <module>()
     77 vae.fit(x=X, y=X, batch_size=batch_size,
     78         epochs=epochs,
---> 79         shuffle=True)

/usr/local/lib/python2.7/site-packages/keras/engine/training.pyc in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, **kwargs)
   1496                               val_f=val_f, val_ins=val_ins, shuffle=shuffle,
   1497                               callback_metrics=callback_metrics,
-> 1498                               initial_epoch=initial_epoch)
   1499
   1500     def evaluate(self, x, y, batch_size=32, verbose=1, sample_weight=None):

/usr/local/lib/python2.7/site-packages/keras/engine/training.pyc in _fit_loop(self, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch)
   1150                 batch_logs['size'] = len(batch_ids)
   1151                 callbacks.on_batch_begin(batch_index, batch_logs)
-> 1152                 outs = f(ins_batch)
   1153                 if not isinstance(outs, list):
   1154                     outs = [outs]

/usr/local/lib/python2.7/site-packages/keras/backend/tensorflow_backend.pyc in __call__(self, inputs)
   2227         session = get_session()
   2228         updated = session.run(self.outputs + [self.updates_op],
-> 2229                               feed_dict=feed_dict)
   2230         return updated[:len(self.outputs)]
   2231

/usr/local/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in run(self, fetches, feed_dict, options, run_metadata)
    776     try:
    777       result = self._run(None, fetches, feed_dict, options_ptr,
--> 778                          run_metadata_ptr)
    779       if run_metadata:
    780         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/usr/local/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run(self, handle, fetches, feed_dict, options, run_metadata)
    980     if final_fetches or final_targets:
    981       results = self._do_run(handle, final_targets, final_fetches,
--> 982                              feed_dict_string, options, run_metadata)
    983     else:
    984       results = []

/usr/local/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1030     if handle is None:
   1031       return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1032                            target_list, options, run_metadata)
   1033     else:
   1034       return self._do_call(_prun_fn, self._session, handle, feed_dict,

/usr/local/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_call(self, fn, *args)
   1050         except KeyError:
   1051           pass
-> 1052       raise type(e)(node_def, op, message)
   1053
   1054   def _extend_graph(self):

InvalidArgumentError: Incompatible shapes: [100,10] vs. [100]
         [[Node: gradients_4/add_121_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _class=["loc:@add_121"], _device="/job:localhost/replica:0/task:0/cpu:0"](gradients_4/add_121_grad/Shape, gradients_4/add_121_grad/Shape_1)]]

Caused by op u'gradients_4/add_121_grad/BroadcastGradientArgs', defined at:
  File "/usr/local/bin/ipython", line 11, in <module>
    sys.exit(start_ipython())
  File "/usr/local/lib/python2.7/site-packages/IPython/__init__.py", line 119, in start_ipython
    return launch_new_instance(argv=argv, **kwargs)
  File "/usr/local/lib/python2.7/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/local/lib/python2.7/site-packages/IPython/terminal/ipapp.py", line 355, in start
    self.shell.mainloop()
  File "/usr/local/lib/python2.7/site-packages/IPython/terminal/interactiveshell.py", line 493, in mainloop
    self.interact()
  File "/usr/local/lib/python2.7/site-packages/IPython/terminal/interactiveshell.py", line 484, in interact
    self.run_cell(code, store_history=True)
  File "/usr/local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2828, in run_ast_nodes
    if self.run_code(code, result):
  File "/usr/local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2882, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-5-475083cdc0be>", line 1, in <module>
    get_ipython().magic(u'run model2.py')
  File "/usr/local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2160, in magic
    return self.run_line_magic(magic_name, magic_arg_s)
  File "/usr/local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2081, in run_line_magic
    result = fn(*args,**kwargs)
  File "<decorator-gen-58>", line 2, in run
  File "/usr/local/lib/python2.7/site-packages/IPython/core/magic.py", line 188, in <lambda>
    call = lambda f, *a, **k: f(*a, **k)
  File "/usr/local/lib/python2.7/site-packages/IPython/core/magics/execution.py", line 742, in run
    run()
  File "/usr/local/lib/python2.7/site-packages/IPython/core/magics/execution.py", line 728, in run
    exit_ignore=exit_ignore)
  File "/usr/local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2483, in safe_execfile
    self.compile if kw['shell_futures'] else None)
  File "/usr/local/lib/python2.7/site-packages/IPython/utils/py3compat.py", line 289, in execfile
    builtin_mod.execfile(filename, *where)
  File "/Users/yjluo/WORK/pitchPerfect/vae/model2.py", line 79, in <module>
    shuffle=True)
  File "/usr/local/lib/python2.7/site-packages/keras/engine/training.py", line 1481, in fit
    self._make_train_function()
  File "/usr/local/lib/python2.7/site-packages/keras/engine/training.py", line 1013, in _make_train_function
    self.total_loss)
  File "/usr/local/lib/python2.7/site-packages/keras/optimizers.py", line 197, in get_updates
    grads = self.get_gradients(loss, params)
  File "/usr/local/lib/python2.7/site-packages/keras/optimizers.py", line 47, in get_gradients
    grads = K.gradients(loss, params)
  File "/usr/local/lib/python2.7/site-packages/keras/backend/tensorflow_backend.py", line 2264, in gradients
    return tf.gradients(loss, variables, colocate_gradients_with_ops=True)
  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/gradients_impl.py", line 560, in gradients
    grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/gradients_impl.py", line 368, in _MaybeCompile
    return grad_fn()  # Exit early
  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/gradients_impl.py", line 560, in <lambda>
    grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/math_grad.py", line 598, in _AddGrad
    rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 411, in _broadcast_gradient_args
    name=name)
  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
    op_def=op_def)
  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
    self._traceback = _extract_stack()

...which was originally created as op u'add_121', defined at:
  File "/usr/local/bin/ipython", line 11, in <module>
    sys.exit(start_ipython())
[elided 16 identical lines from previous traceback]
  File "/usr/local/lib/python2.7/site-packages/IPython/utils/py3compat.py", line 289, in execfile
    builtin_mod.execfile(filename, *where)
  File "/Users/yjluo/WORK/pitchPerfect/vae/model2.py", line 68, in <module>
    vae.compile(optimizer='rmsprop', loss=vae_loss)
  File "/usr/local/lib/python2.7/site-packages/keras/engine/training.py", line 910, in compile
    sample_weight, mask)
  File "/usr/local/lib/python2.7/site-packages/keras/engine/training.py", line 436, in weighted
    score_array = fn(y_true, y_pred)
  File "/Users/yjluo/WORK/pitchPerfect/vae/model2.py", line 64, in vae_loss
    return(xent_loss + kl_loss)
  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/math_ops.py", line 821, in binary_op_wrapper
    return func(x, y, name=name)
  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/gen_math_ops.py", line 73, in add
    result = _op_def_lib.apply_op("Add", x=x, y=y, name=name)
  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
    op_def=op_def)
  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
    self._traceback = _extract_stack()

InvalidArgumentError (see above for traceback): Incompatible shapes: [100,10] vs. [100]
         [[Node: gradients_4/add_121_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _class=["loc:@add_121"], _device="/job:localhost/replica:0/task:0/cpu:0"](gradients_4/add_121_grad/Shape, gradients_4/add_121_grad/Shape_1)]]

Based on the line Incompatible shapes: [100,10] vs. [100], I believe it happens in loss computation but I can't figure out the solution. Moreover, even I don't do the slicing in vae_loss(), the error still show as Incompatible shapes: [100,310] vs. [100]. Could anyone please give me some suggestion?

Solution

The problem is that xent_loss is a 2D-tensor having a shape (100, 10), and kl_loss is a 1D-tensor having a shape (100). In tensorflow, it is invalid to add these two tensors. See this section from the official doc.

Consider the previous example, instead of adding a scalar to a (2,3) matrix, add a vector of dimension (3) to a matrix of dimensions (2,3). Without specifying broadcasting, this operation is invalid. To correctly request matrix-vector addition, specify the broadcasting dimension to be (1), meaning the vector's dimension is matched to dimension 1 of the matrix.

This occurs because metrics.mean_squared_error() takes an average over the feature axis, but not the time axis.

To fix this problem, either take another K.mean() over the time axis:

xent_loss = 10 * K.mean(metrics.mean_squared_error(x_, x_decoded_mean_), axis=-1)

or, use K.squeeze() to remove the features axes before feeding the tensors into metrics.mean_squared_error() (but this only applies to 1D time-series):

x_ = K.squeeze(x[:, 150:160, :], axis=-1)
x_decoded_mean_ = K.squeeze(x_decoded_mean[:, 150:160, :], axis=-1)
xent_loss = 10 * metrics.mean_squared_error(x_, x_decoded_mean_)

However, the best way is to forget about metrics.mean_squared_error(), and compute the MSE by yourself, with a correct axis argument.

xent_loss = 10 * K.mean(K.square(x_ - x_decoded_mean_), axis=[1, 2])