deep-learning theano lasagne dimensionality-reduction autoencoder

Getting dimensions wrong when creating a feed-forward auto-encoder in Theano/Lasagne

I want to create a simple autoencoder with 3000 input, 2 hidden and 3000 output neurons:

def build_autoencoder(input_var=None):
    l_in = InputLayer(shape=(None,3000), input_var=input_var)

    l_hid = DenseLayer(
            l_in, num_units=2,
            nonlinearity=rectify,
            W=lasagne.init.GlorotUniform())

    l_out = DenseLayer(
            l_hid, num_units=3000,
            nonlinearity=softmax)

    return l_out

The shape of the training data is as follows:

train.shape = (3000,3)

This is input, target and loss function definition:

import sys
import os
import time
import numpy as np
import theano
import theano.tensor as T
import lasagne
from lasagne.updates import rmsprop
from lasagne.layers import DenseLayer, DropoutLayer, InputLayer
from lasagne.nonlinearities import rectify, softmax
from lasagne.objectives import categorical_crossentropy
# Creating the Theano variables
input_var = T.dmatrix('inputs')
target_var = T.dmatrix('targets')

# Building the Theano expressions on these variables
network = build_autoencoder(input_var)

prediction = lasagne.layers.get_output(network)
loss = categorical_crossentropy(prediction, target_var)
loss = loss.mean()

test_prediction = lasagne.layers.get_output(network,
                                                    deterministic=True)
test_loss = categorical_crossentropy(test_prediction, target_var)
test_loss = test_loss.mean()
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
                          dtype=theano.config.floatX)

I'm just running one epoch but get an error:

params = lasagne.layers.get_all_params(network, trainable=True)
updates = rmsprop(loss, params, learning_rate=0.001)

# Compiling the graph by declaring the Theano functions

train_fn = theano.function([input_var, target_var],
                                   loss, updates=updates)
val_fn = theano.function([input_var, target_var],
                                 [test_loss, test_acc])

# For loop that goes each time through the hole training
# and validation data
print("Starting training...")
for epoch in range(1):

    # Going over the training data
    train_err = 0
    train_batches = 0
    start_time = time.time()
    print 'test1'
    train_err += train_fn(train, train)
    train_batches += 1

    # Going over the validation data
    val_err = 0
    val_acc = 0
    val_batches = 0
    err, acc = val_fn(train, train)
    val_err += err
    val_acc += acc
    val_batches += 1

    # Then we print the results for this epoch:
    print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time))
    print("training loss:\t\t{:.6f}".format(train_err / train_batches))
    print("validation loss:\t\t{:.6f}".format(val_err / val_batches))
    print("validation accuracy:\t\t{:.2f} %".format(val_acc / val_batches * 100))

This is the error:

ValueError: ('shapes (3000,3) and (3000,2) not aligned: 3 (dim 1) != 3000 (dim 0)', (3000, 3), (3000, 2)) Apply node that caused the error: Dot22(inputs, W) Toposort index: 3 Inputs types: [TensorType(float64, matrix), TensorType(float64, matrix)] Inputs shapes: [(3000, 3), (3000, 2)] Inputs strides: [(24, 8), (16, 8)] Inputs values: ['not shown', 'not shown'] Outputs clients: [[Elemwise{add,no_inplace}(Dot22.0, InplaceDimShuffle{x,0}.0), Elemwise{Composite{(i0 * (Abs(i1) + i2 + i3))}}[(0, 2)](TensorConstant{(1, 1) of 0.5}, Elemwise{add,no_inplace}.0, Dot22.0, InplaceDimShuffle{x,0}.0)]]

To me it seems that the bottleneck of the auto encoder is the problem. Any ideas?

Solution

Just got some help from my IBM college (Erwan), I've posted the working solution to this GIST, the relevant sections are these ones:

First, get the shape of the training data correct:

train.shape = (3, 3000)

Then use the same shape on the InputLayer:

def build_autoencoder(input_var=None):
    l_in = InputLayer(shape=(3, 3000), input_var=input_var)

    l_hid = DenseLayer(
            l_in, num_units=2,
            nonlinearity=rectify,
            W=lasagne.init.GlorotUniform())

    l_out = DenseLayer(
            l_hid, num_units=3000,
            nonlinearity=softmax)

return l_out

So this is solved, next problem is getting a descending cost during training, but this is another topic :)