MXNET CNN+LSTM save/serialize to json

I'm finding a hardtime figuring out how to correctly define a mxnet net so that i can serialize/convert this model to a json file.

The pipeline is composed of a CNN + biLSTM + CTC.

I now i must use HybridBlock and hybridize() but i can't seem to make it work or if its even possible or if there is any other way around.

I'm sure its lack of knowledge on my part and wonder is anyone can help.

Here is the net definition in python:

NUM_HIDDEN = 200
NUM_CLASSES = 13550
NUM_LSTM_LAYER = 1
p_dropout = 0.5
SEQ_LEN = 32

def get_featurizer():
    featurizer = gluon.nn.HybridSequential()
    # conv layer
    featurizer.add(gluon.nn.Conv2D(kernel_size=(3,3), padding=(1,1), channels=32, activation="relu"))
    featurizer.add(gluon.nn.BatchNorm())

    ....
    featurizer.hybridize()
    return featurizer

class EncoderLayer(gluon.Block):
    def __init__(self, **kwargs):
        super(EncoderLayer, self).__init__(**kwargs)
        with self.name_scope():
            self.lstm = mx.gluon.rnn.LSTM(NUM_HIDDEN, NUM_LSTM_LAYER, bidirectional=True)
    def forward(self, x):
        x = x.transpose((0,3,1,2))
        x = x.flatten()
        x = x.split(num_outputs=SEQ_LEN, axis = 1) # (SEQ_LEN, N, CHANNELS)
        x = nd.concat(*[elem.expand_dims(axis=0) for elem in x], dim=0)
        x = self.lstm(x)
        x = x.transpose((1, 0, 2)) # (N, SEQ_LEN, HIDDEN_UNITS)
        return x

def get_encoder():
    encoder = gluon.nn.Sequential()
    encoder.add(EncoderLayer())
    encoder.add(gluon.nn.Dropout(p_dropout))
    return encoder

def get_decoder():
    decoder = mx.gluon.nn.Dense(units=ALPHABET_SIZE, flatten=False)
    decoder.hybridize()
    return decoder

def get_net():
    net = gluon.nn.Sequential()
    with net.name_scope():
        net.add(get_featurizer())
        net.add(get_encoder())
        net.add(get_decoder())
    return net

Any help would be highly appreciated. Thank you very much.

Solution

There are few requirements for a model in Gluon to be exportable to json:

It needs to be hybridizable, meaning that each children block should be hybridizable as well and the model works in both modes
All parameters should be initialized. Since Gluon uses deferred parameter initialization, that means that you should do forward pass at least once before you can save the model.

I did some fixes for your code also introducing new constants when I needed. The most significant changes are:

Don't use split if you can avoid it, because it returns list of NDArrays. Use reshape, which works seemlessly with Symbol as well.
Starting from 1.3.0 version of MXNet, LSTM is also hybridizable, so you can wrap it in a HybridBlock instead of just a Block.
Use HybridSequential.

Here is the adjusted code with an example at the bottom how to save the model and how to load it back. You can find more information in this tutorial.

import mxnet as mx
from mxnet import gluon
from mxnet import nd

BATCH_SIZE = 1
CHANNELS = 100
ALPHABET_SIZE = 1000
NUM_HIDDEN = 200
NUM_CLASSES = 13550
NUM_LSTM_LAYER = 1
p_dropout = 0.5
SEQ_LEN = 32
HEIGHT = 100
WIDTH = 100


def get_featurizer():
    featurizer = gluon.nn.HybridSequential()
    featurizer.add(
        gluon.nn.Conv2D(kernel_size=(3, 3), padding=(1, 1), channels=32, activation="relu"))
    featurizer.add(gluon.nn.BatchNorm())

    return featurizer


class EncoderLayer(gluon.HybridBlock):
    def __init__(self, **kwargs):
        super(EncoderLayer, self).__init__(**kwargs)

        with self.name_scope():
            self.lstm = mx.gluon.rnn.LSTM(NUM_HIDDEN, NUM_LSTM_LAYER, bidirectional=True)

    def hybrid_forward(self, F, x):
        x = x.transpose((0, 3, 1, 2))
        x = x.flatten()
        x = x.reshape(shape=(SEQ_LEN, -1, CHANNELS)) #x.split(num_outputs=SEQ_LEN, axis=1)  # (SEQ_LEN, N, CHANNELS)
        x = self.lstm(x)
        x = x.transpose((1, 0, 2))  # (N, SEQ_LEN, HIDDEN_UNITS)
        return x


def get_encoder():
    encoder = gluon.nn.HybridSequential()
    encoder.add(EncoderLayer())
    encoder.add(gluon.nn.Dropout(p_dropout))
    return encoder


def get_decoder():
    decoder = mx.gluon.nn.Dense(units=ALPHABET_SIZE, flatten=False)
    return decoder


def get_net():
    net = gluon.nn.HybridSequential()

    with net.name_scope():
        net.add(get_featurizer())
        net.add(get_encoder())
        net.add(get_decoder())

    return net


if __name__ == '__main__':
    net = get_net()
    net.initialize()
    net.hybridize()

    fake_data = mx.random.uniform(shape=(BATCH_SIZE, HEIGHT, WIDTH, CHANNELS))
    out = net(fake_data)

    net.export("mymodel")

    deserialized_net = gluon.nn.SymbolBlock.imports("mymodel-symbol.json", ['data'],
                                                    "mymodel-0000.params", ctx=mx.cpu())

    out2 = deserialized_net(fake_data)
    # just to check that we get the same results
    assert (out - out2).sum().asscalar() == 0