Search code examples
pythonpickledeep-copyjsonpickleujson

is it possible to ujson.dumps() python class instance (faster deepcopy)


I am trying to make a fast copy of a class instance. cPickle.loads(cPickle.dumps(),-1) works fine, almost 5 times faster than copy.deepcopy, but I read that ujson is much faster than cPickle. I could not get ujson to work with a custom class, is it possible to do so?

example:

import cPickle as pickle
import ujson

class AClass(object):
    def __init__(self):
        print('init')
        self.v = 10
        self.z = [2,3,4]
        self._zdict = dict(zip(self.z,self.z))

a = AClass()
a
#<__main__.AClass at 0x118b1d390>


# does not work with ujson
ua = ujson.dumps(a)
au = ujson.loads(ua)
au
#{u'v': 10, u'z': [2, 3, 4]}


# but works with pickle
pa = pickle.dumps(a)
ap = pickle.loads(pa)
ap
#<__main__.AClass at 0x117460190>

Solution

  • An idea is to define your own protocole, base of the concept described for pickle. Define a __getstate__ and __setsatte__ instance in your class:

    class AClass(object):
        def __init__(self, v, z):
            self.v = v
            self.z = z
            self._zdict = dict(zip(self.z, self.z))
    
        def __repr__(self):
            return repr({'v': self.v, 'z': self.z, '_zdict': self._zdict})
    
        def __getstate__(self):
            return {'v': self.v, 'z': self.z}
    
        def __setstate__(self, state):
            self.__dict__.update(state)
            self._zdict = dict(zip(self.z, self.z))
    

    Then, you can define a save() and a load()function like this:

    import importlib
    import json
    import io
    
    def save(instance, dst_file):
        data = {
            'module': instance.__class__.__module__,
            'class': instance.__class__.__name__,
            'state': instance.__getstate__()}
        json.dump(data, dst_file)
    
    
    def load(src_file):
        obj = json.load(src_file)
        module_name = obj['module']
        mod = importlib.import_module(module_name)
        cls = getattr(mod, obj['class'])
        instance = cls.__new__(cls)
        instance.__setstate__(obj['state'])
        return instance
    

    Simple usage (using a StringIO here instead of a classic file):

    a_class = AClass(10, [2, 3, 4])
    my_file = io.StringIO()
    save(a_class, my_file)
    
    print(my_file.getvalue())
    # -> {"module": "__main__", "class": "AClass", "state": {"v": 10, "z": [2, 3, 4]}}
    
    my_file = io.StringIO(my_file.getvalue())
    instance = load(my_file)
    
    print(repr(instance))
    # -> {'v': 10, 'z': [2, 3, 4], '_zdict': {2: 2, 3: 3, 4: 4}}