Search code examples
pythonpython-3.xdocplexprocess-pool

BrokenProcessPool error while trying to run docplex example


I am trying to run some cplex models in parallel with python process pool. I tried to run this as an example of process pool with docplex on my windows 10 -spyder 3.6.9. When I run I get this error :

  File "C:/Users/.spyder-py3/docplex_contribs-master/docplex_contribs/src/zoomontecarlo2.py", line 43, in <module>
    main()

  File "C:/Users/.spyder-py3/docplex_contribs-master/docplex_contribs/src/zoomontecarlo2.py", line 36, in main
    allres = run_model_process_pool(model_build_fn=build_sampled_model, nb_process=nb_samples, verbose=True)

  File "C:\Users\.spyder-py3\docplex_contribs-master\docplex_contribs\src\process_pool.py", line 108, in run_model_process_pool
    res = future.result()

  File "C:\Users\AppData\Local\Continuum\anaconda3\lib\concurrent\futures\_base.py", line 425, in result
    return self.__get_result()

  File "C:\Users\AppData\Local\Continuum\anaconda3\lib\concurrent\futures\_base.py", line 384, in __get_result
    raise self._exception

BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending.

I tried using different machines and setting max_worker to 1 but it did not help.

Edit: I put the code I am using to make it more clear . this is my process_pool.py:

import concurrent.futures
from concurrent.futures import ProcessPoolExecutor
class ModelRunner(object):
    run_kw = 'run'
    @staticmethod
    def make_result(result, sol):
        # temporary, for now we cannot pickle solutions.
        if sol:
            if result == 'solution':
                return sol
            elif result == 'dict':
                sol_d = sol.as_name_dict()
                sol_d['_objective_value'] = sol.objective_value
                return sol_d
            else:
                # default is objective
                return sol.objective_value
        else:
            return None

    def __init__(self, buildfn, result="objective", verbose=True):
        self.buildfn = buildfn
        self._result = result
        self.verbose = bool(verbose)

    def __call__(self, **kwargs):
        try:
            nrun_arg = kwargs.get(self.run_kw, -1)
            nrun = int(nrun_arg)

        except (KeyError, TypeError):
            print(f"warning: no run number was found in kwargs")
            nrun = -1

        # use the model build function to create one instance
        m = self.buildfn(**kwargs)
        assert m is not None
        mname = m.name
        if self.verbose:
            print('--> begin run #{0} for model {1}'.format(nrun, mname))
        m.name = '%s_%d' % (mname, nrun)

        sol = m.solve()
        if sol:
            timed = m.solve_details.time
            if self.verbose:
                print(
                    '<-- end run #{0} for model {1}, obj={2}, time={3:.2f}s'.format(nrun, m.name, sol.objective_value, timed))
            return self.make_result(self._result, sol)
        else:
            print("*** model {0} has no solution".format(m.name))
            return None


def run_model_process_pool(model_build_fn, nb_process, max_workers=3,
                           result='objective', verbose=True):
    if nb_process <= 2:
        raise ValueError(f"Expecting a number of processes >= 2, {nb_process} was passed")
    pool_runner = ModelRunner(model_build_fn, result=result, verbose=verbose)
    allres = []
    with ProcessPoolExecutor(max_workers=max_workers) as executor:
        import psutil

        future_to_i = {executor.submit(pool_runner, run=i): i for i in range(nb_process)}  
        # executor.shutdown(wait=False)
        for future in concurrent.futures.as_completed(future_to_i):
            print(psutil.virtual_memory())
            res = future.result()
            if res is not None:
                allres.append(res)
            else:
                return None
    return allres

and thtis one is the zoomontecarlo2.py that has the cplex models inside and uses the process_pool:

import random
from docplex.mp.model import Model
def build_zoo_mincost_model(nbKids):
    mdl = Model(name='buses')
    nbbus40 = mdl.integer_var(name='nbBus40')
    nbbus30 = mdl.integer_var(name='nbBus30')
    costBus40 = 500.0
    costBus30 = 400.0
    mdl.add_constraint(nbbus40 * 40 + nbbus30 * 30 >= nbKids, 'kids')
    mdl.minimize(nbbus40 * costBus40 + nbbus30 * costBus30)
    return mdl

nb_kids = 300
max_floating = 30
nb_samples = 50
samples = [random.randint(-max_floating, max_floating) for _ in range(nb_samples)]
def build_sampled_model(**kwargs):
    nrun = kwargs.pop('run', -1)
    nb_floating = samples[nrun % nb_samples]
    print(f"-- running kids model with {nb_floating} floating kids")
    return build_zoo_mincost_model(300 + nb_floating)
def main():
    from process_pool import run_model_process_pool

    samples = [random.randint(-max_floating, max_floating) for _ in range(nb_samples)]
    allres = run_model_process_pool(model_build_fn=build_sampled_model, nb_process=nb_samples, verbose=True)
    mean_cost = sum(allres) / nb_samples
    print(f"* monte carlo, #samples={nb_samples}, max. absents={max_floating}, mean cost is {mean_cost}")
    print(allres)
   
if __name__ == "__main__":
    main()

When the engine goes inside the "for future in concurrent.futures.as_completed(future_to_i)" loop the memory info is:

svmem(total=17091981312, available=9288286208, percent=45.7, used=7803695104, free=9288286208)

When it get to the "res = future.result()" it crashes with the error above.


Solution

  • Turning one of my comments into an answer: You have to figure out what is the reason that process gets killed abruptly. One potential reason is that it runs out of memory. If a process runs out of memory it might get killed by the operating system which further notice.

    According to your comments this may indeed have been what happened here.