I have a parent
hashmap data structure having a string as key and having hashmap data structures as children (guess child1
, child2
, ..., childN
). Each child is a simple key-value map, having a number as key and a string as value.
In pseudo-code:
parent['key1'] = child1; // child1 is a hash map data structure
child1[0] = 'foo';
child[1] = 'bar';
...
My need is to implement this data structure as a fast lookup table in a database system. Let us take Python as reference language.
Requirements for the solution:
parent
hash will have an estimated total weight of at most 500 MBThe use case is the following:
Would you recommend an in-memory key-value datastore (such as Redis) or a more classical "relational" database solution? Which data model do you suggest me to use?
Sample code using redis-py, assuming you already have Redis (and ideally hiredis) installed, saving each parent as a hash field, with the children as serialized strings, and handling serialization and deserialization on the client side:
JSON version:
## JSON version
import json
# you could use pickle instead,
# just replace json.dumps/json.loads with pickle/unpickle
import redis
# set up the redis client
r = redis.StrictRedis(host = '', port = 6379, db = 0)
# sample parent dicts
parent0 = {'child0': {0:'a', 1:'b', 2:'c',}, 'child1':{5:'e', 6:'f', 7:'g'}}
parent1 = {'child0': {0:'h', 1:'i', 2:'j',}, 'child1':{5:'k', 6:'l', 7:'m'}}
# save the parents as hashfields, with the children as serialized strings
# bear in mind that JSON will convert the int keys to strings in the dumps() process
r.hmset('parent0', {key: json.dumps(parent0[key]) for key in parent0})
r.hmset('parent1', {key: json.dumps(parent0[key]) for key in parent1})
# Get a child dict from a parent
# say child1 of parent0
childstring = r.hget('parent0', 'child1')
childdict = json.loads(childstring)
# this could have been done in a single line...
# if you want to convert the keys back to ints:
for key in childdict.keys():
childdict[int(key)] = childdict[key]
del childdict[key]
print childdict
pickle version:
## pickle version
# For pickle, you need a file-like object.
# StringIO is the native python one, whie cStringIO
# is the c implementation of the same.
# cStringIO is faster
# see http://docs.python.org/library/stringio.html and
# http://www.doughellmann.com/PyMOTW/StringIO/ for more information
import pickle
# Find the best implementation available on this platform
try:
from cStringIO import StringIO
except:
from StringIO import StringIO
import redis
# set up the redis client
r = redis.StrictRedis(host = '', port = 6379, db = 0)
# sample parent dicts
parent0 = {'child0': {0:'a', 1:'b', 2:'c',}, 'child1':{5:'e', 6:'f', 7:'g'}}
parent1 = {'child0': {0:'h', 1:'i', 2:'j',}, 'child1':{5:'k', 6:'l', 7:'m'}}
# define a class with a reusable StringIO object
class Pickler(object):
"""Simple helper class to use pickle with a reusable string buffer object"""
def __init__(self):
self.tmpstr = StringIO()
def __del__(self):
# close the StringIO buffer and delete it
self.tmpstr.close()
del self.tmpstr
def dump(self, obj):
"""Pickle an object and return the pickled string"""
# empty current buffer
self.tmpstr.seek(0,0)
self.tmpstr.truncate(0)
# pickle obj into the buffer
pickle.dump(obj, self.tmpstr)
# move the buffer pointer to the start
self.tmpstr.seek(0,0)
# return the pickled buffer as a string
return self.tmpstr.read()
def load(self, obj):
"""load a pickled object string and return the object"""
# empty the current buffer
self.tmpstr.seek(0,0)
self.tmpstr.truncate(0)
# load the pickled obj string into the buffer
self.tmpstr.write(obj)
# move the buffer pointer to start
self.tmpstr.seek(0,0)
# load the pickled buffer into an object
return pickle.load(self.tmpstr)
pickler = Pickler()
# save the parents as hashfields, with the children as pickled strings,
# pickled using our helper class
r.hmset('parent0', {key: pickler.dump(parent0[key]) for key in parent0})
r.hmset('parent1', {key: pickler.dump(parent1[key]) for key in parent1})
# Get a child dict from a parent
# say child1 of parent0
childstring = r.hget('parent0', 'child1')
# this could be done in a single line...
childdict = pickler.load(childstring)
# we don't need to do any str to int conversion on the keys.
print childdict