I am running this code as a CherryPy Web Service both on Mac OS X and Ubuntu 14.04. By using multiprocessing
on python3 I want to start the static method worker()
in an asynchronous way, within a Process Pool
.
The same code runs flawlessly on Mac OS X, in Ubuntu 14.04 worker()
does not run. I.e. by debugging the code inside the POST
method I am able to see that each line is executed - from
reqid = str(uuid.uuid4())
to
return handle_error(202, "Request ID: " + reqid)
Starting the same code in Ubuntu 14.04, it does not run the worker()
method, not even a print()
at the top of the method (which would be logged).
Here's the relevant code (I only omitted the handle_error()
method):
import cherrypy
import json
from lib import get_parameters, handle_error
from multiprocessing import Pool
import os
from pymatbridge import Matlab
import requests
import shutil
import uuid
from xml.etree import ElementTree
class Schedule(object):
exposed = True
def __init__(self, mlab_path, pool):
self.mlab_path = mlab_path
self.pool = pool
def POST(self, *paths, **params):
if validate(cherrypy.request.headers):
try:
reqid = str(uuid.uuid4())
path = os.path.join("results", reqid)
os.makedirs(path)
wargs = [(self.mlab_path, reqid)]
self.pool.apply_async(Schedule.worker, wargs)
return handle_error(202, "Request ID: " + reqid)
except:
return handle_error(500, "Internal Server Error")
else:
return handle_error(401, "Unauthorized")
#### this is not executed ####
@staticmethod
def worker(args):
mlab_path, reqid = args
mlab = Matlab(executable=mlab_path)
mlab.start()
mlab.run_code("cd mlab")
mlab.run_code("sched")
a = mlab.get_variable("a")
mlab.stop()
return reqid
####
# to start the Web Service
if __name__ == "__main__":
# start Web Service with some configuration
global_conf = {
"global": {
"server.environment": "production",
"engine.autoreload.on": True,
"engine.autoreload.frequency": 5,
"server.socket_host": "0.0.0.0",
"log.screen": False,
"log.access_file": "site.log",
"log.error_file": "site.log",
"server.socket_port": 8084
}
}
cherrypy.config.update(global_conf)
conf = {
"/": {
"request.dispatch": cherrypy.dispatch.MethodDispatcher(),
"tools.encode.debug": True,
"request.show_tracebacks": False
}
}
pool = Pool(3)
cherrypy.tree.mount(Schedule('matlab', pool), "/sched", conf)
# activate signal handler
if hasattr(cherrypy.engine, "signal_handler"):
cherrypy.engine.signal_handler.subscribe()
# start serving pages
cherrypy.engine.start()
cherrypy.engine.block()
Your logic is hiding the problem from you. The apply_async
method returns an AsyncResult object which acts as a handler to the asynchronous task you just scheduled. As you ignore the outcome of the scheduled task, the whole thing looks like "failing silently".
If you try to get the results from that task, you'd see the real problem.
handler = self.pool.apply_async(Schedule.worker, wargs)
handler.get()
... traceback here ...
cPickle.PicklingError: Can't pickle <type 'function'>: attribute lookup __builtin__.function failed
In short, you must ensure the arguments you pass to the Pool are Picklable.
Instance and class methods are Picklable if the object/class they belong to is picklable as well. Static methods are not picklable because they loose the association with the object itself, therefore the pickle library cannot serialise them correctly.
As a general line, is better to avoid scheduling to multiprocessing.Pool
anything different than a top level defined functions.