performance mathematical-optimization pyomo julia-jump

Is there a more efficient implementation for Pyomo models compared to the current intuitive one?

A while ago I started trying to make a fair performance comparison of Pyomo and JuMP. I created a GitHub repository to share the code I use and I am happy if anyone wants to contribute. As of now, I am trying to find a more efficient implementation for the Pyomo models than the intuitive one i came up so far:

IJKLM

def pyomo(I, IJK, JKL, KLM, solve):
    model = pyo.ConcreteModel()

    model.I = pyo.Set(initialize=I)

    x_list = [
        (i, j, k, l, m) for (i, j, k) in IJK for l in JKL[j, k] for m in KLM[k, l]
    ]

    constraint_dict_i = {
        ii: ((i, j, k, l, m) for (i, j, k, l, m) in x_list if i == ii) for ii in I
    }

    model.x_list = pyo.Set(initialize=x_list)
    model.c_dict_i = pyo.Set(model.I, initialize=constraint_dict_i)

    model.z = pyo.Param(default=1)

    model.x = pyo.Var(model.x_list, domain=pyo.NonNegativeReals)

    model.OBJ = pyo.Objective(expr=model.z)

    model.ei = pyo.Constraint(model.I, rule=ei_rule)

    if solve:
        opt = pyo.SolverFactory("gurobi")
        opt.solve(model)


def ei_rule(model, i):
    return sum(model.x[i, j, k, l, m] for i, j, k, l, m in model.c_dict_i[i]) >= 0

Supply Chain

def intuitive_pyomo(I, L, M, IJ, JK, IK, KL, LM, D, solve):
    model = pyo.ConcreteModel()

    model.I = pyo.Set(initialize=I)
    model.L = pyo.Set(initialize=L)
    model.M = pyo.Set(initialize=M)
    model.IJ = pyo.Set(initialize=IJ)
    model.JK = pyo.Set(initialize=JK)
    model.IK = pyo.Set(initialize=IK)
    model.KL = pyo.Set(initialize=KL)
    model.LM = pyo.Set(initialize=LM)

    model.f = pyo.Param(default=1)
    model.d = pyo.Param(model.I, model.M, initialize=D)

    model.x = pyo.Var(
        [
            (i, j, k)
            for (i, j) in model.IJ
            for (jj, k) in model.JK
            if jj == j
            for (ii, kk) in model.IK
            if (ii == i) and (kk == k)
        ],
        domain=pyo.NonNegativeReals,
    )

    model.y = pyo.Var(
        [(i, k, l) for i in model.I for (k, l) in model.KL], domain=pyo.NonNegativeReals
    )

    model.z = pyo.Var(
        [(i, l, m) for i in model.I for (l, m) in model.LM], domain=pyo.NonNegativeReals
    )

    model.OBJ = pyo.Objective(expr=model.f)

    model.production = pyo.Constraint(model.IK, rule=intuitive_production_rule)
    model.transport = pyo.Constraint(model.I, model.L, rule=intuitive_transport_rule)
    model.demand = pyo.Constraint(model.I, model.M, rule=intuitive_demand_rule)

    # model.write("int.lp")

    if solve:
        opt = pyo.SolverFactory("gurobi")
        opt.solve(model)


def intuitive_production_rule(model, i, k):
    lhs = [
        model.x[i, j, k]
        for (ii, j) in model.IJ
        if ii == i
        for (jj, kk) in model.JK
        if (jj == j) and (kk == k)
    ]
    rhs = [model.y[i, k, l] for (kk, l) in model.KL if kk == k]

    if lhs or rhs:
        return sum(lhs) >= sum(rhs)
    else:
        return pyo.Constraint.Skip


def intuitive_transport_rule(model, i, l):
    lhs = [model.y[i, k, l] for (k, ll) in model.KL if ll == l]
    rhs = [model.z[i, l, m] for (lll, m) in model.LM if lll == l]

    if lhs or rhs:
        return sum(lhs) >= sum(rhs)
    else:
        return pyo.Constraint.Skip


def intuitive_demand_rule(model, i, m):
    return sum(model.z[i, l, m] for (l, mm) in model.LM if mm == m) >= model.d[i, m]

I measure performance in model generation time for two exemplary models I refer to as IJKLM and Supply Chain.

Models:

IJKLM

Supply Chain

These are the results for increasing instance sizes:

IJKLM

Supply Chain

Can anyone help to improve Pyomos performance?

Solution

Here are a couple mods/notions for the Supply Chain Pyomo model for you to consider aside from the data issues that you have (under-representative scope and infeasibilities).

Making the lhs set inside the production constraint is expensive, and likely much more expensive if the data were representative. And in the current construct, you are executing that expensive operation inside of a function that is going to execute |model.IK| times. You can wrangle the data separately from the elements that you have to pre-compute the valid indices for model.x as shown with an indexed set in the model, or just a dictionary if you don't want to make the indexed set in the model. This same construct can be used to generate the domain for model.x as shown, for a tiny speedup (because the domain for x is only computed once) for free.
You can reduce the number of production constraints to only what is required by the demand (rhs in your construct). Presumably, in the production model, you would minimize x, so the only constraints needed are where there is a rhs. Note that I dropped in a little print statement to show infeasibilities where there is demand, but no means to produce (defect in the data).

With those 2 tweaks, I'm getting runs of |model.I| at 7900 in 3.8 seconds build time. I think that will improve (relative to original) with better data.

Your code w/ mods:

import pyomo.environ as pyo
import logging
import timeit
import pandas as pd
import numpy as np
from collections import defaultdict
from itertools import chain

logging.getLogger("pyomo.core").setLevel(logging.ERROR)


########## Intuitive Pyomo ##########
def run_intuitive_pyomo(I, L, M, IJ, JK, IK, KL, LM, D, solve, repeats, number):
    setup = {
        "I": I,
        "L": L,
        "M": M,
        "IJ": IJ,
        "JK": JK,
        "IK": IK,
        "KL": KL,
        "LM": LM,
        "D": D,
        "solve": solve,
        "model_function": intuitive_pyomo,
    }
    r = timeit.repeat(
        "model_function(I, L, M, IJ, JK, IK, KL, LM, D, solve)",
        repeat=repeats,
        number=number,
        globals=setup,
    )

    result = pd.DataFrame(
        {
            "I": [len(I)],
            "Language": ["Intuitive Pyomo"],
            "MinTime": [np.min(r)],
            "MeanTime": [np.mean(r)],
            "MedianTime": [np.median(r)],
        }
    )
    return result


def intuitive_pyomo(I, L, M, IJ, JK, IK, KL, LM, D, solve):

    # some data wrangling
    IJ_dict = defaultdict(set)
    for i, j in IJ:  IJ_dict[i].add(j)
    KJ_dict = defaultdict(set)
    for j, k in JK:  KJ_dict[k].add(j)
    # make a dictionary of (i, k) : {(i, j, k) tuples}
    IK_IJK = {(i, k): {(i, j, k) for j in IJ_dict.get(i, set()) & KJ_dict.get(k, set())}
             for (i, k) in IK}

    model = pyo.ConcreteModel()

    model.I = pyo.Set(initialize=I)
    model.L = pyo.Set(initialize=L)
    model.M = pyo.Set(initialize=M)
    model.IJ = pyo.Set(initialize=IJ)
    model.JK = pyo.Set(initialize=JK)
    model.IK = pyo.Set(initialize=IK)
    model.KL = pyo.Set(initialize=KL)
    model.LM = pyo.Set(initialize=LM)

    model.IK_IJK = pyo.Set(IK_IJK.keys(), initialize=IK_IJK)

    model.f = pyo.Param(default=1)
    model.d = pyo.Param(model.I, model.M, initialize=D)

    # x_idx = [
    #         (i, j, k)
    #         for (i, j) in model.IJ
    #         for (jj, k) in model.JK
    #         if jj == j
    #         for (ii, kk) in model.IK
    #         if (ii == i) and (kk == k)
    #     ]
    x_idx_quick = list(chain(*IK_IJK.values()))
    # assert set(x_idx) == set(x_idx_quick)   # sanity check.  Make sure it is same...
    model.x = pyo.Var(x_idx_quick,
        domain=pyo.NonNegativeReals,
    )
    print(f'length of model.I: {len(I)}')
    print(f'length of modek.IK: {len(IK)}')
    print(f'size of model.x: {len(x_idx_quick)}')

    model.y = pyo.Var(
        [(i, k, l) for i in model.I for (k, l) in model.KL], domain=pyo.NonNegativeReals
    )

    model.z = pyo.Var(
        [(i, l, m) for i in model.I for (l, m) in model.LM], domain=pyo.NonNegativeReals
    )

    model.OBJ = pyo.Objective(expr=model.f)


    # model.write("int.lp")

    if solve:
        opt = pyo.SolverFactory("cbc")
        opt.solve(model)


    def intuitive_production_rule(model, i, k):
        lhs = [model.x[i, j, k] for i, j, k in model.IK_IJK[i, k]]

        rhs = [model.y[i, k, l] for (kk, l) in model.KL if kk == k]

        # show where the data is infeasible...
        # if rhs and not lhs:
        #     print(f'infeasible for (i, k): {i}, {k}')

        if lhs and rhs:
            return sum(lhs) >= sum(rhs)
        else:
            return pyo.Constraint.Skip


    def intuitive_transport_rule(model, i, l):
        lhs = [model.y[i, k, l] for (k, ll) in model.KL if ll == l]
        rhs = [model.z[i, l, m] for (lll, m) in model.LM if lll == l]

        if lhs or rhs:
            return sum(lhs) >= sum(rhs)
        else:
            return pyo.Constraint.Skip


    def intuitive_demand_rule(model, i, m):
        return sum(model.z[i, l, m] for (l, mm) in model.LM if mm == m) >= model.d[i, m]

    model.production = pyo.Constraint(IK_IJK.keys(), rule=intuitive_production_rule)
    print(f'created {len(model.production)} production constraints')
    model.transport = pyo.Constraint(model.I, model.L, rule=intuitive_transport_rule)
    model.demand = pyo.Constraint(model.I, model.M, rule=intuitive_demand_rule)