Search code examples
pythonserializationdeep-learningpytorchpickle

UnpicklingError: A load persistent id instruction was encountered, but no persistent_load function was specified


I was trying to run a python file named api.py. In this file, I'm loading the pickle file of the Deep Learning model that was built and trained using PyTorch.

api.py In api.py the below-given functions are the most important ones.

def load_model_weights(model_architecture, weights_path):
  if os.path.isfile(weights_path):
      cherrypy.log("CHERRYPYLOG Loading model from: {}".format(weights_path))
      model_architecture.load_state_dict(torch.load(weights_path))
  else:
      raise ValueError("Path not found {}".format(weights_path))

        
def load_recommender(vector_dim, hidden, activation, dropout, weights_path):

    rencoder_api = model.AutoEncoder(layer_sizes=[vector_dim] + [int(l) for l in hidden.split(',')],
                               nl_type=activation,
                               is_constrained=False,
                               dp_drop_prob=dropout,
                               last_layer_activations=False)
    load_model_weights(rencoder_api, weights_path) 
    rencoder_api.eval()
    rencoder_api = rencoder_api.cuda()
    return rencoder_api

The directory structure

πŸ“¦MP1
 ┣ πŸ“‚.ipynb_checkpoints
 ┃ β”— πŸ“œRS_netflix3months_100epochs_64,128,128-checkpoint.ipynb
 ┣ πŸ“‚data
 ┃ ┣ πŸ“œAutoEncoder.png
 ┃ ┣ πŸ“œcollaborative_filtering.gif
 ┃ ┣ πŸ“œmovie_titles.txt
 ┃ β”— πŸ“œshut_up.gif
 ┣ πŸ“‚DeepRecommender
 ┃ ┣ πŸ“‚data_utils
 ┃ ┃ ┣ πŸ“œmovielens_data_convert.py
 ┃ ┃ β”— πŸ“œnetflix_data_convert.py
 ┃ ┣ πŸ“‚reco_encoder
 ┃ ┃ ┣ πŸ“‚data
 ┃ ┃ ┃ ┣ πŸ“‚__pycache__
 ┃ ┃ ┃ ┃ ┣ πŸ“œinput_layer.cpython-37.pyc
 ┃ ┃ ┃ ┃ ┣ πŸ“œinput_layer_api.cpython-37.pyc
 ┃ ┃ ┃ ┃ β”— πŸ“œ__init__.cpython-37.pyc
 ┃ ┃ ┃ ┣ πŸ“œinput_layer.py
 ┃ ┃ ┃ ┣ πŸ“œinput_layer_api.py
 ┃ ┃ ┃ β”— πŸ“œ__init__.py
 ┃ ┃ ┣ πŸ“‚model
 ┃ ┃ ┃ ┣ πŸ“‚__pycache__
 ┃ ┃ ┃ ┃ ┣ πŸ“œmodel.cpython-37.pyc
 ┃ ┃ ┃ ┃ β”— πŸ“œ__init__.cpython-37.pyc
 ┃ ┃ ┃ ┣ πŸ“œmodel.py
 ┃ ┃ ┃ β”— πŸ“œ__init__.py
 ┃ ┃ ┣ πŸ“‚__pycache__
 ┃ ┃ ┃ β”— πŸ“œ__init__.cpython-37.pyc
 ┃ ┃ β”— πŸ“œ__init__.py
 ┃ ┣ πŸ“‚__pycache__
 ┃ ┃ β”— πŸ“œ__init__.cpython-37.pyc
 ┃ ┣ πŸ“œcompute_RMSE.py
 ┃ ┣ πŸ“œinfer.py
 ┃ ┣ πŸ“œrun.py
 ┃ β”— πŸ“œ__init__.py
 ┣ πŸ“‚model_save
 ┃ ┣ πŸ“‚model.epoch_99
 ┃ ┃ β”— πŸ“‚archive
 ┃ ┃ ┃ ┣ πŸ“‚data
 ┃ ┃ ┃ ┃ ┣ πŸ“œ92901648
 ┃ ┃ ┃ ┃ ┣ πŸ“œ92901728
 ┃ ┃ ┃ ┃ ┣ πŸ“œ92901808
 ┃ ┃ ┃ ┃ ┣ πŸ“œ92901888
 ┃ ┃ ┃ ┃ ┣ πŸ“œ92901968
 ┃ ┃ ┃ ┃ ┣ πŸ“œ92902048
 ┃ ┃ ┃ ┃ ┣ πŸ“œ92902128
 ┃ ┃ ┃ ┃ ┣ πŸ“œ92902208
 ┃ ┃ ┃ ┃ ┣ πŸ“œ92902288
 ┃ ┃ ┃ ┃ ┣ πŸ“œ92902368
 ┃ ┃ ┃ ┃ ┣ πŸ“œ92902448
 ┃ ┃ ┃ ┃ β”— πŸ“œ92902608
 ┃ ┃ ┃ ┣ πŸ“œdata.pkl
 ┃ ┃ ┃ β”— πŸ“œversion
 ┃ ┣ πŸ“œmodel.epoch_99.zip
 ┃ β”— πŸ“œmodel.onnx
 ┣ πŸ“‚Netflix
 ┃ ┣ πŸ“‚N1Y_TEST
 ┃ ┃ β”— πŸ“œn1y.test.txt
 ┃ ┣ πŸ“‚N1Y_TRAIN
 ┃ ┃ β”— πŸ“œn1y.train.txt
 ┃ ┣ πŸ“‚N1Y_VALID
 ┃ ┃ β”— πŸ“œn1y.valid.txt
 ┃ ┣ πŸ“‚N3M_TEST
 ┃ ┃ β”— πŸ“œn3m.test.txt
 ┃ ┣ πŸ“‚N3M_TRAIN
 ┃ ┃ β”— πŸ“œn3m.train.txt
 ┃ ┣ πŸ“‚N3M_VALID
 ┃ ┃ β”— πŸ“œn3m.valid.txt
 ┃ ┣ πŸ“‚N6M_TEST
 ┃ ┃ β”— πŸ“œn6m.test.txt
 ┃ ┣ πŸ“‚N6M_TRAIN
 ┃ ┃ β”— πŸ“œn6m.train.txt
 ┃ ┣ πŸ“‚N6M_VALID
 ┃ ┃ β”— πŸ“œn6m.valid.txt
 ┃ ┣ πŸ“‚NF_TEST
 ┃ ┃ β”— πŸ“œnf.test.txt
 ┃ ┣ πŸ“‚NF_TRAIN
 ┃ ┃ β”— πŸ“œnf.train.txt
 ┃ β”— πŸ“‚NF_VALID
 ┃ ┃ β”— πŸ“œnf.valid.txt
 ┣ πŸ“‚test
 ┃ ┣ πŸ“‚testData_iRec
 ┃ ┃ ┣ πŸ“œ.part-00199-f683aa3b-8840-4835-b8bc-a8d1eaa11c78.txt.crc
 ┃ ┃ ┣ πŸ“œpart-00000-f683aa3b-8840-4835-b8bc-a8d1eaa11c78.txt
 ┃ ┃ ┣ πŸ“œpart-00003-f683aa3b-8840-4835-b8bc-a8d1eaa11c78.txt
 ┃ ┃ β”— πŸ“œ_SUCCESS
 ┃ ┣ πŸ“‚testData_uRec
 ┃ ┃ ┣ πŸ“œ.part-00000-4a844096-8dd9-425e-9d9d-bd9062cc6940.txt.crc
 ┃ ┃ ┣ πŸ“œ._SUCCESS.crc
 ┃ ┃ ┣ πŸ“œpart-00161-4a844096-8dd9-425e-9d9d-bd9062cc6940.txt
 ┃ ┃ ┣ πŸ“œpart-00196-4a844096-8dd9-425e-9d9d-bd9062cc6940.txt
 ┃ ┃ β”— πŸ“œpart-00199-4a844096-8dd9-425e-9d9d-bd9062cc6940.txt
 ┃ ┣ πŸ“œdata_layer_tests.py
 ┃ ┣ πŸ“œtest_model.py
 ┃ β”— πŸ“œ__init__.py
 ┣ πŸ“‚__pycache__
 ┃ ┣ πŸ“œapi.cpython-37.pyc
 ┃ ┣ πŸ“œload_test.cpython-37.pyc
 ┃ ┣ πŸ“œparameters.cpython-37.pyc
 ┃ β”— πŸ“œutils.cpython-37.pyc
 ┣ πŸ“œapi.py
 ┣ πŸ“œcompute_RMSE.py
 ┣ πŸ“œload_test.py
 ┣ πŸ“œlogger.py
 ┣ πŸ“œnetflix_1y_test.csv
 ┣ πŸ“œnetflix_1y_train.csv
 ┣ πŸ“œnetflix_1y_valid.csv
 ┣ πŸ“œnetflix_3m_test.csv
 ┣ πŸ“œnetflix_3m_train.csv
 ┣ πŸ“œnetflix_3m_valid.csv
 ┣ πŸ“œnetflix_6m_test.csv
 ┣ πŸ“œnetflix_6m_train.csv
 ┣ πŸ“œnetflix_6m_valid.csv
 ┣ πŸ“œnetflix_full_test.csv
 ┣ πŸ“œnetflix_full_train.csv
 ┣ πŸ“œnetflix_full_valid.csv
 ┣ πŸ“œparameters.py
 ┣ πŸ“œpreds.txt
 ┣ πŸ“œRS_netflix3months_100epochs_64,128,128.ipynb
 β”— πŸ“œutils.py

I am getting such an error (serialization.py). Can someone help me with this error?

D:\Anaconda\envs\practise\lib\site-packages\torch\serialization.py in _legacy_load(f, map_location, pickle_module, **pickle_load_args)
    762             "functionality.")
    763 
--> 764     magic_number = pickle_module.load(f, **pickle_load_args)
    765     if magic_number != MAGIC_NUMBER:
    766         raise RuntimeError("Invalid magic number; corrupt file?")

UnpicklingError: A load persistent id instruction was encountered,
but no persistent_load function was specified.

Solution

  • After searching through PyTorch documentation, I ended up saving the model in the ONNX format and later loaded that ONNX model into PyTorch model and used it for inference.

    import onnx
    from onnx2pytorch import ConvertModel
    
    
    def load_model_weights(model_architecture, weights_path):
        if os.path.isfile("model.onnx"):
            cherrypy.log("CHERRYPYLOG Loading model from: {}".format(weights_path))
            onnx_model = onnx.load("model.onnx")
            pytorch_model = ConvertModel(onnx_model)
            ## model_architecture.load_state_dict(torch.load(weights_path))
        else:
            raise ValueError("Path not found {}".format(weights_path))
    
            
    def load_recommender(vector_dim, hidden, activation, dropout, weights_path):
    
        rencoder_api = model.AutoEncoder(layer_sizes=[vector_dim] + [int(l) for l in hidden.split(',')],
                                   nl_type=activation,
                                   is_constrained=False,
                                   dp_drop_prob=dropout,
                                   last_layer_activations=False)
        load_model_weights(rencoder_api, weights_path) 
        rencoder_api.eval()
        rencoder_api = rencoder_api.cuda()
        return rencoder_api
    

    Some useful resources:

    torch.save

    torch.load

    ONNX tutorials