I'm trying to use pool processing of Python to update global dictionary named: globalDict. I'm expecting that the globalDict={'0':0,'1':1,'2':2} but after the code run, this dictionary is still blank. Please help me to fix this issue, the code as below:
from multiprocessing import Pool
import time
def f(x):
global globalDict # update this dictionary every time function called
globalDict.setdefault(str(x),x)
return globalDict
def init_pool(dictX):
# function to initial global dictionary
global globalDict
globalDict = dictX
if __name__ == '__main__':
start=time.time()
globalDict={}
pool=Pool(initializer=init_pool, initargs=(globalDict,)) # initial global dictionary
pool.map(f, range(3)) # using pool processing to call f()function
pool.close()
pool.join()
stop=time.time()
print('Done in {:4f}'.format(stop-start))
A solution is to use a managed dictionary. There is no need to be returning the dictionary back from the worker function, f
:
from multiprocessing import Pool, Manager
import time
def f(x):
globalDict.setdefault(str(x),x)
def init_pool(dictX):
# function to initial global dictionary
global globalDict
globalDict = dictX
if __name__ == '__main__':
start = time.time()
with Manager() as manager:
globalDict = manager.dict()
pool = Pool(initializer=init_pool, initargs=(globalDict,)) # initial global dictionary
pool.map(f, range(3)) # using pool processing to call f()function
pool.close()
pool.join()
stop = time.time()
print('Done in {:4f}'.format(stop-start))
print(globalDict)
Prints:
Done in 0.606996
{'0': 0, '2': 2, '1': 1
If you want to end up with a "regular" dictionary that no longer requires the SycnManager
class that is returned by the call to Manager()
, then after the call to map
completes, add the following statement:
regular_dict = {k: v for k, v in globalDict.items()}
Or, if you want to get clever, you can create your own managed dictionary type (we will call it Dict
) that only supports the one method we need, setdefault
, and dispatches that method call to an underlying dict
that we will be able to retrieve when our call to map
completes:
from multiprocessing import Pool
from multiprocessing.managers import BaseManager
import time
class DictManager(BaseManager):
pass
class Dict:
def __init__(self):
self._dict = {}
def setdefault(self, *args):
return self._dict.setdefault(*args)
def get_underlying_dict(self):
return self._dict
def f(x):
globalDict.setdefault(str(x),x)
def init_pool(dictX):
# function to initial global dictionary
global globalDict
globalDict = dictX
if __name__ == '__main__':
start = time.time()
DictManager.register('Dict', Dict)
with DictManager() as manager:
globalDict = manager.Dict()
pool = Pool(initializer=init_pool, initargs=(globalDict,)) # initial global dictionary
pool.map(f, range(3)) # using pool processing to call f()function
pool.close()
pool.join()
stop = time.time()
print('Done in {:4f}'.format(stop-start))
regular_dict = globalDict.get_underlying_dict()
print(regular_dict)
Prints:
Done in 0.460001
{'0': 0, '1': 1, '2': 2}