I am trying to adapt the code to achieve 1-D convolution using 1-D input. The model is compilable so you can see the layers and shapes in .summary()
, but it throws the error when .fit()
the model. it seems to occur in loss computation. Below is my code:
import numpy as np
from scipy.stats import norm
from keras.layers import Input, Dense, Lambda, Flatten, Reshape
from keras.layers import Conv1D, UpSampling1D
from keras.models import Model
from keras import backend as K
from keras import metrics
num_conv = 6
batch_size = 100
latent_dim = 2
intermediate_dim = 128
epochs = 50
epsilon_std = 1.0
x = Input(batch_shape=(batch_size, 310, 1))
conv_1 = Conv1D(1, kernel_size=num_conv,
padding='same', activation='relu')(x)
conv_2 = Conv1D(64, kernel_size=num_conv,
padding='same', strides=2, activation='relu')(conv_1)
conv_3 = Conv1D(64, kernel_size=num_conv,
padding='same', activation='relu')(conv_2)
flatten = Flatten()(conv_3)
hidden = Dense(intermediate_dim, activation='relu')(flatten)
z_mean = Dense(latent_dim)(hidden)
z_log_var = Dense(latent_dim)(hidden)
def sampling(args):
z_mean, z_log_var = args
epsilon = K.random_normal(shape=(batch_size, latent_dim),
mean=0., stddev=epsilon_std)
return(z_mean + K.exp(z_log_var/2) * epsilon)
z = Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])
decoder_h = Dense(256, activation='relu')(z)
decoder = Dense(155, activation='relu')(decoder_h)
decoder = Reshape((155, 1))(decoder)
de_conv_1 = Conv1D(64, kernel_size=num_conv,
padding='same', activation='relu')(decoder)
de_conv_2 = Conv1D(64, kernel_size=num_conv,
padding='same', activation='relu')(de_conv_1)
upsamp = UpSampling1D(2)(de_conv_2)
x_decoded_mean = Conv1D(1, kernel_size=num_conv,
padding='same', activation='relu')(upsamp)
x_decoded_mean = Reshape([310, 1])(x_decoded_mean)
def vae_loss(x, x_decoded_mean):
x_ = x[:, 150:160, :]
x_decoded_mean_ = x_decoded_mean[:, 150:160, :]
xent_loss = 10 * metrics.mean_squared_error(x_, x_decoded_mean_)
kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) -K.exp(z_log_var), axis=-1)
return(xent_loss + kl_loss)
vae = Model(x, x_decoded_mean)
vae.summary()
vae.compile(optimizer='rmsprop', loss=vae_loss)
The input data shape is (n_sample, 310, 1). It is an one-D time-series but I include prior and posterior 150 frames to predict the middle 10 frames, resulting in 310 frames as input.
In vae_loss()
, the reason that x
and x_decoded_mean
are sliced is that the purpose is to reconstruct the middle 10 frames with additional information of prior and posterior 150 frames. Therefore I want to force the model to focus on loss computed only from the middle 10 frames.
I got the following error when I .fit()
the model:
# X.shape == (n_samples, 310, 1)
# n_samples % batch_size == 0
vae.fit(x=X, y=X, batch_size=batch_size,
epochs=epochs,
shuffle=True)
The long error below:
Epoch 1/50
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
/Users/yjluo/WORK/pitchPerfect/vae/model2.py in <module>()
77 vae.fit(x=X, y=X, batch_size=batch_size,
78 epochs=epochs,
---> 79 shuffle=True)
/usr/local/lib/python2.7/site-packages/keras/engine/training.pyc in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, **kwargs)
1496 val_f=val_f, val_ins=val_ins, shuffle=shuffle,
1497 callback_metrics=callback_metrics,
-> 1498 initial_epoch=initial_epoch)
1499
1500 def evaluate(self, x, y, batch_size=32, verbose=1, sample_weight=None):
/usr/local/lib/python2.7/site-packages/keras/engine/training.pyc in _fit_loop(self, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch)
1150 batch_logs['size'] = len(batch_ids)
1151 callbacks.on_batch_begin(batch_index, batch_logs)
-> 1152 outs = f(ins_batch)
1153 if not isinstance(outs, list):
1154 outs = [outs]
/usr/local/lib/python2.7/site-packages/keras/backend/tensorflow_backend.pyc in __call__(self, inputs)
2227 session = get_session()
2228 updated = session.run(self.outputs + [self.updates_op],
-> 2229 feed_dict=feed_dict)
2230 return updated[:len(self.outputs)]
2231
/usr/local/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in run(self, fetches, feed_dict, options, run_metadata)
776 try:
777 result = self._run(None, fetches, feed_dict, options_ptr,
--> 778 run_metadata_ptr)
779 if run_metadata:
780 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
/usr/local/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _run(self, handle, fetches, feed_dict, options, run_metadata)
980 if final_fetches or final_targets:
981 results = self._do_run(handle, final_targets, final_fetches,
--> 982 feed_dict_string, options, run_metadata)
983 else:
984 results = []
/usr/local/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1030 if handle is None:
1031 return self._do_call(_run_fn, self._session, feed_dict, fetch_list,
-> 1032 target_list, options, run_metadata)
1033 else:
1034 return self._do_call(_prun_fn, self._session, handle, feed_dict,
/usr/local/lib/python2.7/site-packages/tensorflow/python/client/session.pyc in _do_call(self, fn, *args)
1050 except KeyError:
1051 pass
-> 1052 raise type(e)(node_def, op, message)
1053
1054 def _extend_graph(self):
InvalidArgumentError: Incompatible shapes: [100,10] vs. [100]
[[Node: gradients_4/add_121_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _class=["loc:@add_121"], _device="/job:localhost/replica:0/task:0/cpu:0"](gradients_4/add_121_grad/Shape, gradients_4/add_121_grad/Shape_1)]]
Caused by op u'gradients_4/add_121_grad/BroadcastGradientArgs', defined at:
File "/usr/local/bin/ipython", line 11, in <module>
sys.exit(start_ipython())
File "/usr/local/lib/python2.7/site-packages/IPython/__init__.py", line 119, in start_ipython
return launch_new_instance(argv=argv, **kwargs)
File "/usr/local/lib/python2.7/site-packages/traitlets/config/application.py", line 658, in launch_instance
app.start()
File "/usr/local/lib/python2.7/site-packages/IPython/terminal/ipapp.py", line 355, in start
self.shell.mainloop()
File "/usr/local/lib/python2.7/site-packages/IPython/terminal/interactiveshell.py", line 493, in mainloop
self.interact()
File "/usr/local/lib/python2.7/site-packages/IPython/terminal/interactiveshell.py", line 484, in interact
self.run_cell(code, store_history=True)
File "/usr/local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2718, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2828, in run_ast_nodes
if self.run_code(code, result):
File "/usr/local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2882, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-5-475083cdc0be>", line 1, in <module>
get_ipython().magic(u'run model2.py')
File "/usr/local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2160, in magic
return self.run_line_magic(magic_name, magic_arg_s)
File "/usr/local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2081, in run_line_magic
result = fn(*args,**kwargs)
File "<decorator-gen-58>", line 2, in run
File "/usr/local/lib/python2.7/site-packages/IPython/core/magic.py", line 188, in <lambda>
call = lambda f, *a, **k: f(*a, **k)
File "/usr/local/lib/python2.7/site-packages/IPython/core/magics/execution.py", line 742, in run
run()
File "/usr/local/lib/python2.7/site-packages/IPython/core/magics/execution.py", line 728, in run
exit_ignore=exit_ignore)
File "/usr/local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2483, in safe_execfile
self.compile if kw['shell_futures'] else None)
File "/usr/local/lib/python2.7/site-packages/IPython/utils/py3compat.py", line 289, in execfile
builtin_mod.execfile(filename, *where)
File "/Users/yjluo/WORK/pitchPerfect/vae/model2.py", line 79, in <module>
shuffle=True)
File "/usr/local/lib/python2.7/site-packages/keras/engine/training.py", line 1481, in fit
self._make_train_function()
File "/usr/local/lib/python2.7/site-packages/keras/engine/training.py", line 1013, in _make_train_function
self.total_loss)
File "/usr/local/lib/python2.7/site-packages/keras/optimizers.py", line 197, in get_updates
grads = self.get_gradients(loss, params)
File "/usr/local/lib/python2.7/site-packages/keras/optimizers.py", line 47, in get_gradients
grads = K.gradients(loss, params)
File "/usr/local/lib/python2.7/site-packages/keras/backend/tensorflow_backend.py", line 2264, in gradients
return tf.gradients(loss, variables, colocate_gradients_with_ops=True)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/gradients_impl.py", line 560, in gradients
grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/gradients_impl.py", line 368, in _MaybeCompile
return grad_fn() # Exit early
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/gradients_impl.py", line 560, in <lambda>
grad_scope, op, func_call, lambda: grad_fn(op, *out_grads))
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/math_grad.py", line 598, in _AddGrad
rx, ry = gen_array_ops._broadcast_gradient_args(sx, sy)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 411, in _broadcast_gradient_args
name=name)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
self._traceback = _extract_stack()
...which was originally created as op u'add_121', defined at:
File "/usr/local/bin/ipython", line 11, in <module>
sys.exit(start_ipython())
[elided 16 identical lines from previous traceback]
File "/usr/local/lib/python2.7/site-packages/IPython/utils/py3compat.py", line 289, in execfile
builtin_mod.execfile(filename, *where)
File "/Users/yjluo/WORK/pitchPerfect/vae/model2.py", line 68, in <module>
vae.compile(optimizer='rmsprop', loss=vae_loss)
File "/usr/local/lib/python2.7/site-packages/keras/engine/training.py", line 910, in compile
sample_weight, mask)
File "/usr/local/lib/python2.7/site-packages/keras/engine/training.py", line 436, in weighted
score_array = fn(y_true, y_pred)
File "/Users/yjluo/WORK/pitchPerfect/vae/model2.py", line 64, in vae_loss
return(xent_loss + kl_loss)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/math_ops.py", line 821, in binary_op_wrapper
return func(x, y, name=name)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/ops/gen_math_ops.py", line 73, in add
result = _op_def_lib.apply_op("Add", x=x, y=y, name=name)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
self._traceback = _extract_stack()
InvalidArgumentError (see above for traceback): Incompatible shapes: [100,10] vs. [100]
[[Node: gradients_4/add_121_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _class=["loc:@add_121"], _device="/job:localhost/replica:0/task:0/cpu:0"](gradients_4/add_121_grad/Shape, gradients_4/add_121_grad/Shape_1)]]
Based on the line Incompatible shapes: [100,10] vs. [100]
, I believe it happens in loss computation but I can't figure out the solution. Moreover, even I don't do the slicing in vae_loss()
, the error still show as Incompatible shapes: [100,310] vs. [100]
. Could anyone please give me some suggestion?
The problem is that xent_loss
is a 2D-tensor having a shape (100, 10)
, and kl_loss
is a 1D-tensor having a shape (100)
. In tensorflow, it is invalid to add these two tensors. See this section from the official doc.
Consider the previous example, instead of adding a scalar to a (2,3) matrix, add a vector of dimension (3) to a matrix of dimensions (2,3). Without specifying broadcasting, this operation is invalid. To correctly request matrix-vector addition, specify the broadcasting dimension to be (1), meaning the vector's dimension is matched to dimension 1 of the matrix.
This occurs because metrics.mean_squared_error()
takes an average over the feature axis, but not the time axis.
To fix this problem, either take another K.mean()
over the time axis:
xent_loss = 10 * K.mean(metrics.mean_squared_error(x_, x_decoded_mean_), axis=-1)
or, use K.squeeze()
to remove the features axes before feeding the tensors into metrics.mean_squared_error()
(but this only applies to 1D time-series):
x_ = K.squeeze(x[:, 150:160, :], axis=-1)
x_decoded_mean_ = K.squeeze(x_decoded_mean[:, 150:160, :], axis=-1)
xent_loss = 10 * metrics.mean_squared_error(x_, x_decoded_mean_)
However, the best way is to forget about metrics.mean_squared_error()
, and compute the MSE by yourself, with a correct axis
argument.
xent_loss = 10 * K.mean(K.square(x_ - x_decoded_mean_), axis=[1, 2])