PuLP optimization running (essentially) infinitely despite never improving the answer?

I have a python script which solves the following optimization:

from pulp import *
import matplotlib.pyplot as plt


rates = {'h': {'level_pay_rate': 0.1425, 'on_demand': 0.2}, 'x': {'level_pay_rate': 0.0129, 'on_demand': 0.0162}, 'ak': {'level_pay_rate': 0.3642, 'on_demand': 0.448}, 'ao': {'level_pay_rate': 0.003, 'on_demand': 0.0042}, 'an': {'level_pay_rate': 0.1821, 'on_demand': 0.224}, 'y': {'level_pay_rate': 0.0165, 'on_demand': 0.023}, 'e': {'level_pay_rate': 3.342, 'on_demand': 4.608}, 'j': {'level_pay_rate': 3.901, 'on_demand': 4.512}, 'b': {'level_pay_rate': 0.493, 'on_demand': 0.68}, 'i': {'level_pay_rate': 0.3265, 'on_demand': 0.384}, 'l': {'level_pay_rate': 0.071, 'on_demand': 0.096}, 'n': {'level_pay_rate': 0.325, 'on_demand': 0.376}, 'u': {'level_pay_rate': 0.365, 'on_demand': 0.504}, 'p': {'level_pay_rate': 0.083, 'on_demand': 0.113}, 'z': {'level_pay_rate': 0.1322, 'on_demand': 0.1856}, 'q': {'level_pay_rate': 0.695, 'on_demand': 0.952}, 'ai': {'level_pay_rate': 0.1936, 'on_demand': 0.24}, 'f': {'level_pay_rate': 0.313, 'on_demand': 0.432}, 'aa': {'level_pay_rate': 0.2399, 'on_demand': 0.3328}, 'ac': {'level_pay_rate': 0.06, 'on_demand': 0.0832}, 'ag': {'level_pay_rate': 0.015, 'on_demand': 0.0208}, 'v': {'level_pay_rate': 5.808, 'on_demand': 8.016}, 'r': {'level_pay_rate': 1.389, 'on_demand': 1.904}, 'am': {'level_pay_rate': 0.032, 'on_demand': 0.0372}, 'ae': {'level_pay_rate': 0.0484, 'on_demand': 0.06}, 's': {'level_pay_rate': 0.32506, 'on_demand': 0.376}, 'ad': {'level_pay_rate': 0.03, 'on_demand': 0.0416}, 'a': {'level_pay_rate': 0.246, 'on_demand': 0.34}, 'w': {'level_pay_rate': 0.0083, 'on_demand': 0.0116}, 'ah': {'level_pay_rate': 0.12, 'on_demand': 0.1664}, 'g': {'level_pay_rate': 0.078, 'on_demand': 0.108}, 'd': {'level_pay_rate': 0.123, 'on_demand': 0.17}, 'aj': {'level_pay_rate': 0.217, 'on_demand': 0.3008}, 'al': {'level_pay_rate': 0.0542, 'on_demand': 0.0752}, 'm': {'level_pay_rate': 0.141, 'on_demand': 0.192}, 'o': {'level_pay_rate': 0.62, 'on_demand': 0.712}, 'af': {'level_pay_rate': 0.0037, 'on_demand': 0.0052}, 'c': {'level_pay_rate': 1.108, 'on_demand': 1.53}, 'k': {'level_pay_rate': 1.3, 'on_demand': 1.504}, 'ab': {'level_pay_rate': 0.3871, 'on_demand': 0.48}, 't': {'level_pay_rate': 2.403, 'on_demand': 3.06}}
hourly_demand = {'5': {'h': 0.748056, 'x': 3.0, 'ak': 2.0, 'ao': 1.0, 'an': 2.0, 'y': 5.0, 'e': 2.0, 'j': 1.0, 'b': 38.97125391, 'i': 9.0, 'l': 2.0, 'n': 12.0, 'u': 2.0, 'p': 3.0, 'z': 1.0, 'q': 3.0, 'ai': 35.18181818, 'f': 5.0, 'aa': 2.0, 'ac': 1.0, 'ag': 3.0, 'v': 1.0, 'r': 1.0, 'am': 1.0, 'ae': 2.0, 's': 6.683077626, 'ad': 3.0, 'a': 2.0, 'w': 2.0, 'ah': 1.0, 'g': 24.0, 'd': 23.0, 'aj': 1.0, 'al': 1.0, 'm': 73.0, 'o': 2.0, 'af': 2.0, 'c': 16.0, 'k': 1.0, 'ab': 18.0, 't': 0}, '6': {'ac': 1.0, 'a': 2.0, 'l': 2.0, 'aj': 1.0, 'b': 38.88245591, 'i': 7.0, 'al': 1.0, 'q': 3.0, 'f': 5.0, 'p': 3.0, 'c': 16.0, 'k': 1.0, 'm': 73.0, 'aa': 2.0, 'z': 1.0, 'ab': 9.0, 'ak': 2.0, 'x': 3.0, 'ai': 5.395093035, 'w': 2.0, 'n': 12.0, 'af': 2.0, 's': 1.0, 'd': 23.0, 'r': 1.0, 'u': 2.0, 'an': 2.0, 'j': 1.0, 'am': 1.0, 'y': 5.0, 'g': 24.0, 'o': 2.0, 'e': 2.0, 'ao': 1.0, 'ad': 3.0, 'v': 1.0, 'ah': 2.0, 'ag': 3.0, 'h': 0, 'ae': 0, 't': 0}, '7': {'e': 2.0, 'b': 29.04976177, 'p': 3.0, 'af': 2.0, 'ac': 1.0, 'w': 2.0, 'k': 1.0, 'f': 5.0, 'z': 1.0, 'y': 5.0, 'd': 23.0, 'i': 7.0, 'n': 12.0, 'l': 2.0, 'ai': 1.181818182, 'ag': 3.0, 'a': 2.0, 'v': 1.184444, 'r': 1.0, 'c': 16.0, 'j': 1.0, 'g': 24.0, 'm': 73.0, 'o': 2.0, 'aa': 2.0, 'ah': 2.0, 'u': 2.0, 'ak': 2.0, 's': 1.0, 'ab': 7.071698244, 'aj': 1.0, 'an': 2.0, 'ao': 1.0, 'q': 3.0, 'ad': 3.0, 'am': 1.0, 'x': 3.0, 'al': 1.0, 'h': 0, 'ae': 0, 't': 0}, '8': {'aa': 2.0, 'ab': 6.155000661, 'b': 7.0, 'm': 41.67691468, 'n': 12.0, 'ah': 2.0, 'al': 1.0, 'q': 3.0, 'ak': 2.0, 'o': 2.0, 'ag': 3.0, 'v': 1.0, 'k': 1.0, 'd': 23.0, 'ac': 1.0, 'g': 12.0, 'am': 1.0, 'u': 2.0, 'af': 2.0, 'e': 2.0, 'ad': 3.0, 'f': 3.0, 'c': 1.0, 'aj': 1.0, 'an': 2.0, 'y': 5.0, 'z': 1.0, 'a': 2.0, 'r': 1.0, 'ao': 1.0, 's': 1.0, 'p': 3.0, 'i': 7.0, 'j': 1.0, 't': 16.956666, 'h': 0, 'x': 0, 'l': 0, 'ai': 0, 'ae': 0, 'w': 0}, '9': {'g': 24.0, 'l': 2.0, 'p': 3.0, 't': 0.398611, 'e': 2.0, 'b': 40.98876661, 'r': 1.0, 'ac': 1.0, 'ai': 1.181818182, 'q': 3.0, 'x': 3.0, 'o': 2.0, 'ak': 2.0, 'am': 1.0, 'j': 1.0, 'af': 2.0, 'v': 1.0, 'ag': 3.0, 'an': 2.0, 'ab': 5.09321567, 'aa': 2.0, 'u': 2.0, 's': 1.0, 'm': 73.0, 'c': 16.0, 'i': 7.0, 'k': 1.0, 'y': 5.0, 'aj': 1.0, 'd': 23.0, 'a': 2.0, 'ah': 2.0, 'ao': 1.0, 'w': 2.0, 'ad': 3.0, 'n': 12.0, 'al': 1.0, 'f': 5.0, 'z': 1.0, 'h': 0, 'ae': 0}}





I = rates.keys()              # items
T = hourly_demand.keys()   # time periods

IT = {(i, t) for i in I for t in T}  # item-time tuples
print(IT)
# Define the model
model = LpProblem("level_pay_model", LpMinimize)

# Define decision variables
level_pay_amt = LpVariable("level_pay", lowBound=0)
level_pay = LpVariable.dicts('level_pay', IT, lowBound = 0, cat='Integer')  # the quantity of i in time t to buy with level-pay

# Define objective function: the aggregate cost of servicing all demands by either level pay or on-demand

# a helper function for the hourly cost
def hourly_cost(t):
    #      the level pay         the total of items not covered by level pay * on-demand cost
    return level_pay_amt + lpSum((hourly_demand[t][i] - level_pay[i, t]) * rates[i]['on_demand'] for i in I)
model += lpSum(hourly_cost(t) for t in T)

print(model)
# alternate construct with out the function...
#model += lpSum(level_pay_amt + lpSum((hourly_demand[t][i] - level_pay[i, t]) * rates[i]['on_demand'] for i in I) for t in T)

# constraints
# 1. don't bust the level-pay dollars
for t in T:
    model += lpSum(level_pay[i, t] * rates[i]['level_pay_rate'] for i in I) <= level_pay_amt

# 2. limit the level-pay to the demand.... or else get funky negative results.
for i, t in IT:
    model += lpSum(level_pay[i, t]) <= hourly_demand[t][i]

solution = model.solve()

print(f'level pay amt: {level_pay_amt.varValue}')

for t in T:
    print(value(hourly_cost(t)))

for i,t in sorted(IT):
    print("level pay:", i, t, level_pay[i, t].varValue)

print()

# Visualize the hourly costs and the optimized commitment rate

cost_vec = [value(hourly_cost(t)) for t in sorted(T)]
plt.plot(sorted(T), cost_vec, label='Hourly Cost')
plt.axhline(y=value(level_pay_amt), color='r', linestyle='-', label='Level Pay Amt.')
plt.xlabel('Hour')
plt.ylabel('$/hr')
plt.legend()
plt.title(f'Costs by Hour [Total Cost: ${value(model.objective) : 0.2f}]')
plt.show()

If I reduce the size of the input data (rates and hourly_demand), the optimization model finds the solution quite quickly (nearly instantly). However, right around the size of the present input data, the optimization model/solver (Cbc0010I) takes ages to figure it out. So long in fact that I need to kill the script once it gets past 1M nodes because I'm afraid of such a large number.

Additionally, it seems to have already found the right answer pretty early on, but continues to iterate regardless (see an arbitrary snippet of the stack trace below):

Cbc0010I After 930000 nodes, 119185 on tree, -143.4139 best solution, best possible -143.43226 (200.59 seconds)
Cbc0010I After 931000 nodes, 119094 on tree, -143.4139 best solution, best possible -143.43226 (200.79 seconds)
Cbc0010I After 932000 nodes, 119430 on tree, -143.4139 best solution, best possible -143.43226 (201.02 seconds)
Cbc0010I After 933000 nodes, 119365 on tree, -143.4139 best solution, best possible -143.43226 (201.24 seconds)
Cbc0010I After 934000 nodes, 119356 on tree, -143.4139 best solution, best possible -143.43226 (201.44 seconds)
Cbc0010I After 935000 nodes, 119286 on tree, -143.4139 best solution, best possible -143.43226 (201.63 seconds)
Cbc0010I After 936000 nodes, 119234 on tree, -143.4139 best solution, best possible -143.43226 (201.83 seconds)
Cbc0010I After 937000 nodes, 119182 on tree, -143.4139 best solution, best possible -143.43226 (202.01 seconds)
Cbc0010I After 938000 nodes, 119096 on tree, -143.4139 best solution, best possible -143.43226 (202.24 seconds)
Cbc0010I After 939000 nodes, 119455 on tree, -143.4139 best solution, best possible -143.43226 (202.48 seconds)

The best solution hardly changes (if at all), but the model continues to iterate. Is there any way I can stop it from iterating once its 'good enough' (perhaps by specifying some sufficient decimal precision?), give it a node limit, or fix whatever is causing it to continue to iterate?

Solution

In many Mixed Integer Programs (MIPs), you can get to a situation where the solver churns for a long time to try to find the optimal answer. This is highly dependent on the structure of the problem and the data, and beyond the scope of a post here.

By solving a "relaxed version" of the problem, the solver knows what the theoretical lower/upper bound (best theoretical) solution. So it will keep going to close the gap between what it has (best current) and best possible. Realize that there is no guarantee that the best theoretical solution is feasible with the integer constraints, so it may not be actually achievable.

It is common practice to put in a small acceptable "gap" in these scenarios to tell the solver, "I'm OK if you get within this fractional gap of the theoretical max, we're not launching spacecraft and this is good enough." Typical values are maybe 0.05 (or 5%), so that when the (best theoretical - current) / best theoretical <= 0.05 the solver stops.

In pulp the default solver you are using is cbc. You can look up the manual for that to see all of the options, but the one you want is gapRel. You can look up the pulp documentation on how to pass this option, or multiple options.

To just change the gap, change your solving line to this:

solution = model.solve(PULP_CBC_CMD(gapRel = 0.02))

And it solves almost instantaneously with a 2% guarantee.