Search code examples
pythonscikit-learnpersistencepicklekeras

How to save Scikit-Learn-Keras Model into a Persistence File (pickle/hd5/json/yaml)


I have the following code, using Keras Scikit-Learn Wrapper:

from keras.models import Sequential
from sklearn import datasets
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn import preprocessing
import pickle
import numpy as np
import json

def classifier(X, y):
    """
    Description of classifier
    """
    NOF_ROW, NOF_COL =  X.shape

    def create_model():
        # create model
        model = Sequential()
        model.add(Dense(12, input_dim=NOF_COL, init='uniform', activation='relu'))
        model.add(Dense(6, init='uniform', activation='relu'))
        model.add(Dense(1, init='uniform', activation='sigmoid'))
        # Compile model
        model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model

    # evaluate using 10-fold cross validation
    seed = 7
    np.random.seed(seed)
    model = KerasClassifier(build_fn=create_model, nb_epoch=150, batch_size=10, verbose=0)
    return model


def main():
    """
    Description of main
    """

    iris = datasets.load_iris()
    X, y = iris.data, iris.target
    X = preprocessing.scale(X)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
    model_tt = classifier(X_train, y_train)
    model_tt.fit(X_train,y_train)

    #--------------------------------------------------
    # This fail
    #-------------------------------------------------- 
    filename = 'finalized_model.sav'
    pickle.dump(model_tt, open(filename, 'wb'))
    # load the model from disk
    loaded_model = pickle.load(open(filename, 'rb'))
    result = loaded_model.score(X_test, Y_test)
    print(result)

    #--------------------------------------------------
    # This also fail
    #--------------------------------------------------
    # from keras.models import load_model       
    # model_tt.save('test_model.h5')


    #--------------------------------------------------
    # This works OK 
    #-------------------------------------------------- 
    # print model_tt.score(X_test, y_test)
    # print model_tt.predict_proba(X_test)
    # print model_tt.predict(X_test)


# Output of predict_proba
# 2nd column is the probability that the prediction is 1
# this value is used as final score, which can be used
# with other method as comparison
# [   [ 0.25311464  0.74688536]
#     [ 0.84401423  0.15598579]
#     [ 0.96047372  0.03952631]
#     ...,
#     [ 0.25518912  0.74481088]
#     [ 0.91467732  0.08532269]
#     [ 0.25473493  0.74526507]]

# Output of predict
# [[1]
# [0]
# [0]
# ...,
# [1]
# [0]
# [1]]


if __name__ == '__main__':
    main()

As stated in the code there it fails at this line:

pickle.dump(model_tt, open(filename, 'wb'))

With this error:

pickle.PicklingError: Can't pickle <function create_model at 0x101c09320>: it's not found as __main__.create_model

How can I get around it?


Solution

  • Edit 1 : Original answer about saving model

    With HDF5 :

    # saving model
    json_model = model_tt.model.to_json()
    open('model_architecture.json', 'w').write(json_model)
    # saving weights
    model_tt.model.save_weights('model_weights.h5', overwrite=True)
    
    
    # loading model
    from keras.models import model_from_json
    
    model = model_from_json(open('model_architecture.json').read())
    model.load_weights('model_weights.h5')
    
    # dont forget to compile your model
    model.compile(loss='binary_crossentropy', optimizer='adam')
    

    Edit 2 : full code example with iris dataset

    # Train model and make predictions
    import numpy
    import pandas
    from keras.models import Sequential, model_from_json
    from keras.layers import Dense
    from keras.utils import np_utils
    from sklearn import datasets
    from sklearn import preprocessing
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import LabelEncoder
    
    # fix random seed for reproducibility
    seed = 7
    numpy.random.seed(seed)
    
    # load dataset
    iris = datasets.load_iris()
    X, Y, labels = iris.data, iris.target, iris.target_names
    X = preprocessing.scale(X)
    
    # encode class values as integers
    encoder = LabelEncoder()
    encoder.fit(Y)
    encoded_Y = encoder.transform(Y)
    
    # convert integers to dummy variables (i.e. one hot encoded)
    y = np_utils.to_categorical(encoded_Y)
    
    def build_model():
        # create model
        model = Sequential()
        model.add(Dense(4, input_dim=4, init='normal', activation='relu'))
        model.add(Dense(3, init='normal', activation='sigmoid'))
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        return model
    
    def save_model(model):
        # saving model
        json_model = model.to_json()
        open('model_architecture.json', 'w').write(json_model)
        # saving weights
        model.save_weights('model_weights.h5', overwrite=True)
    
    def load_model():
        # loading model
        model = model_from_json(open('model_architecture.json').read())
        model.load_weights('model_weights.h5')
        model.compile(loss='categorical_crossentropy', optimizer='adam')
        return model
    
    
    X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.3, random_state=seed)
    
    # build
    model = build_model()
    model.fit(X_train, Y_train, nb_epoch=200, batch_size=5, verbose=0)
    
    # save
    save_model(model)
    
    # load
    model = load_model()
    
    # predictions
    predictions = model.predict_classes(X_test, verbose=0)
    print(predictions)
    # reverse encoding
    for pred in predictions:
        print(labels[pred])
    

    Please note that I used Keras only, not the wrapper. It only add some complexity in something simple. Also code is voluntarily not refactored so you can have the whole picture.

    Also, you said you want to output 1 or 0. It is not possible in this dataset because you have 3 output dims and classes (Iris-setosa, Iris-versicolor, Iris-virginica). If you had only 2 classes then your output dim and classes would be 0 or 1 using sigmoid output fonction.