Search code examples
pythonmxnetgluonmlp

mxnet.base.MXNetError: Shape inconsistent, Provided = [32,4], inferred shape=[32,1]


I Have a very basic MLP net:

def create_gluon_model(num_features, num_classes):
    # Create Model in Gluon
    net = nn.HybridSequential()
    net.add(nn.Dense(num_features, activation="relu"))
    net.add(nn.Dense(1000, activation="relu"))
    net.add(nn.Dense(num_classes))
    #net.hybridize()
    net.initialize(init=init.Xavier(), ctx=mx.cpu())
    return net

My Input data is of shape: (32, 20)
Output shape: (32, 4)
Label shape: (32, 4) num_classes = 4

When I try to train:

def train_vmhnet(net, train_data_loader, valid_data_loader, batch_size=32):
    criterion = gluon.loss.SoftmaxCrossEntropyLoss()
    trainer = gluon.Trainer(net.collect_params(), "sgd", {"learning_rate": 0.1})
    # Start the training.
    for epoch in range(1):
        train_loss, train_acc, valid_acc = 0.0, 0.0, 0.0
        tic = time.time()
        for batch_idx, (data, label) in enumerate(train_data_loader):
            data = data.as_in_context(mx.cpu(0))
            label = label.as_in_context(ctx)
            # forward + backward
            with autograd.record():
                output = net(data)
                loss = criterion(output, label)
            loss.backward()
            # update parameters
            trainer.step(data.shape[0])
            # calculate training metrics
            train_loss += loss.mean().asscalar()
            train_acc += acc(output, label)
        print(epoch)
        # calculate validation accuracy
        for batch_idx, (data, label) in enumerate(valid_data_loader):
            data = data.as_in_context(mx.cpu(0))
            valid_acc += acc(net(data), label)
        print(
            "Epoch %d: loss %.3f, train acc %.3f, test acc %.3f, in %.1f sec"
            % (
                epoch,
                train_loss / len(dataset_train),
                train_acc / len(dataset_train),
                valid_acc / len(dataset_test),
                time.time() - tic,
            )
        )

IM getting this following error:

mxnet.base.MXNetError: Shape inconsistent, Provided = [32,4], inferred shape=[32,1]

Please help


Solution

  • After hours of searching, I finally found the fix here. If you're using one hot encoding, make sure you specify the parameter "sparse_label=False" in

    criterion = gluon.loss.SoftmaxCrossEntropyLoss(sparse_label=False)
    

    This fixed it