Search code examples
pythonbase64pickle

Encode and Decode with Base64 and Pickle


I need to pickle a dict, then Base64 encode this before transporting the data via an API call..

The receiver should decode the Base64 data and the pickle load it back in to a proper dict.

Issue is that it fails on the decoding of it, it doesn't seem to be the same binary data after Decode the Base64 data, hence the Pickle fails.

What am I missing?

import pickle
import base64
import json

def publishData():
   testDict = {}
   testDict['testKey1'] = [1,2,3]
   testDict['testKey2'] = [4,5,6]
   #Dump the dict to pickle file
   with open("test.pkl","wb") as f:
      pickle.dump(testDict, f)
   #Read the pickle
   with open("test.pkl", "rb") as openfile:
      data = openfile.read() #Read the raw pickle (binary)
   print("publishData - Pickle read : {}".format(data))
   #Base64 encode it to ensure formatting in JSON
   data = base64.b64encode(data)
   print("publishData - Base64 encoded : {}".format(data))
   #Create a json to be published via API
   publishJson = json.dumps({"payload":str(data)})
   print("publishData - Publish JSON : {}".format(publishJson))
   #Decode the data
   decodeData(publishJson)

def decodeData(publishJson):
   data = json.loads(publishJson)
   payload = data['payload']
   payload = base64.b64decode(payload)
   print("decodeData - Payload decoded: {}".format(payload))
   print(pickle.loads(payload))

if __name__ == "__main__":
   publishData()

Output:

publishData - Pickle read : b'\x80\x04\x95/\x00\x00\x00\x00\x00\x00\x00}\x94(\x8c\x08testKey1\x94]\x94(K\x01K\x02K\x03e\x8c\x08testKey2\x94]\x94(K\x04K\x05K\x06eu.'
publishData - Base64 encoded : b'gASVLwAAAAAAAAB9lCiMCHRlc3RLZXkxlF2UKEsBSwJLA2WMCHRlc3RLZXkylF2UKEsESwVLBmV1Lg=='
publishData - Publish JSON : {"payload": "b'gASVLwAAAAAAAAB9lCiMCHRlc3RLZXkxlF2UKEsBSwJLA2WMCHRlc3RLZXkylF2UKEsESwVLBmV1Lg=='"}
decodeData - Payload decoded: b'n\x00\x12T\xbc\x00\x00\x00\x00\x00\x00\x01\xf6P\xa20!\xd1\x95\xcd\xd1-\x95\xe4\xc6QvP\xa1,\x05,\t,\r\x960!\xd1\x95\xcd\xd1-\x95\xe4\xcaQvP\xa1,\x11,\x15,\x19\x95\xd4\xb8'

_pickle.UnpicklingError: invalid load key, 'n'.

Solution

  • Call data.decode() or the equivalent str(data, encoding='utf-8') to convert the bytes to a valid base64-encoded string:

    # publishJson = json.dumps({"payload": str(data)})     # -
    publishJson = json.dumps({"payload": data.decode())})  # +
    

    From https://docs.python.org/3/library/stdtypes.html#str:

    Passing a bytes object to str() without the encoding or errors arguments falls under the first case of returning the informal string representation

    print(data)                 #  b'gASVLwAAAAAAAAB9lCiMCHRlc3RLZXkxlF2UKEsBSwJLA2WMCHRlc3RLZXkylF2UKEsESwVLBmV1Lg=='
    print(repr(data))           #  b'gASVLwAAAAAAAAB9lCiMCHRlc3RLZXkxlF2UKEsBSwJLA2WMCHRlc3RLZXkylF2UKEsESwVLBmV1Lg=='
    
    print(str(data))            #  b'gASVLwAAAAAAAAB9lCiMCHRlc3RLZXkxlF2UKEsBSwJLA2WMCHRlc3RLZXkylF2UKEsESwVLBmV1Lg=='
    print(repr(str(data)))      # "b'gASVLwAAAAAAAAB9lCiMCHRlc3RLZXkxlF2UKEsBSwJLA2WMCHRlc3RLZXkylF2UKEsESwVLBmV1Lg=='"
    
    print(data.decode())        #    gASVLwAAAAAAAAB9lCiMCHRlc3RLZXkxlF2UKEsBSwJLA2WMCHRlc3RLZXkylF2UKEsESwVLBmV1Lg==
    print(repr(data.decode()))  #   'gASVLwAAAAAAAAB9lCiMCHRlc3RLZXkxlF2UKEsBSwJLA2WMCHRlc3RLZXkylF2UKEsESwVLBmV1Lg=='