I am quite the beginner with Python, and I am working on a dynamic optimization program with finite horizon : my reward function is subjected to a capital that accumulate as a function of the control, and to a finite resource that depletes according to the same control. However, my resource constraint function does not seem to work as the resource ends up being negative :
non-negativity constraint :
# Non-negativity constraint for finite resource
def resource_constraint(controls):
finite_resource = initial_state[0]
for t in range(horizon):
finite_resource -= controls[t]
if finite_resource < 0:
return finite_resource # Constraint violated, return the negative value
return 0 # Constraint satisfied, return 0
which is the implemented in the optimization routine :
# Define constraint for finite resource
finite_resource_constraint = {'type': 'ineq', 'fun': resource_constraint}
# Solve the optimization problem with constraints
result = minimize(optimization_problem, initial_controls, method='SLSQP', bounds=control_bounds_list, constraints=finite_resource_constraint)
The full program being
import numpy as np
from scipy.optimize import minimize
# Problem parameters
horizon = 40 # Time horizon
state_dim = 2 # Dimension of the state
initial_state = np.array([100.0, 0.0]) # Initial state: [finite resource, accumulated resource]
# Dynamics function
def dynamics_function(state, control):
finite_resource = state[0] - control # Depletion of the finite resource
accumulated_resource = state[1] + 0.2 * control # Accumulation of the second resource
return np.array([finite_resource, accumulated_resource])
# Reward function to be maximized
def reward_function(state, control):
return 1 - control**2 + 2*state[1] # Example production utility, to be maximized
# Non-negativity constraint for finite resource
def resource_constraint(controls):
finite_resource = initial_state[0]
for t in range(horizon):
finite_resource -= controls[t]
if finite_resource < 0:
return finite_resource # Constraint violated, return the negative value
return 0 # Constraint satisfied, return 0
# Define the optimization problem
def optimization_problem(controls):
total_reward = 0
state = initial_state.copy()
for t in range(horizon):
control = controls[t]
total_reward += reward_function(state, control)
state = dynamics_function(state, control)
return -total_reward # Maximize the total reward (minimize the negative)
# Initial guess for controls
initial_controls = np.zeros(horizon)
# Define bounds for controls (production rate)
control_bounds = (0, np.inf) # Production rate bounds with unbounded upper limit
control_bounds_list = [control_bounds] * horizon
# Define constraint for finite resource
finite_resource_constraint = {'type': 'ineq', 'fun': resource_constraint}
# Solve the optimization problem with constraints
result = minimize(optimization_problem, initial_controls, method='SLSQP', bounds=control_bounds_list, constraints=finite_resource_constraint)
# Extract the optimal controls (production rates)
optimal_controls = result.x
# Calculate the finite resource at the end of optimization
final_state = initial_state.copy()
for t in range(horizon):
final_state = dynamics_function(final_state, optimal_controls[t])
print("Optimal Production Rates:", optimal_controls)
print("Optimal Utility:", -result.fun)
print("Final Finite Resource:", final_state[0])
I would expect the resource to remain non-negative, but this is not the case. Do you see where I am getting this wrong ?
Thank you in advance
(A lot of) simplification is called for. The most important change is that you should return, from your constraint function, an entire control series found from the cumulative sum of your control variable, and let Scipy interpret every value in this series as needing to be non-negative:
import numpy as np
from scipy.optimize import minimize, Bounds, NonlinearConstraint
horizon = 40 # Time horizon
initial_state = np.array((100, 0)) # finite resource, accumulated resource
# Depletion of the finite resource,
# Accumulation of the second resource
control_coef = np.array((-1, 0.2))
def dynamics_function(state: np.ndarray, control: float) -> np.ndarray:
return state + control_coef*control
def reward_function(state: np.ndarray, control: float) -> float:
"""Reward function to be maximized"""
finite, accumulated = state
return 1 - control**2 + 2*accumulated # Example production utility, to be maximized
def resource_constraint(controls: np.ndarray) -> float:
"""Non-negativity constraint for finite resource"""
finite, accumulated = initial_state
control_series = finite - controls.cumsum()
return control_series
def optimization_problem(controls: np.ndarray) -> float:
total_reward = 0
state = initial_state
for control in controls:
total_reward += reward_function(state, control)
state = dynamics_function(state, control)
return -total_reward # Maximize the total reward (minimize the cost)
def main() -> None:
result = minimize(
fun=optimization_problem,
x0=np.zeros(horizon),
bounds=Bounds(lb=0),
constraints=NonlinearConstraint(fun=resource_constraint, lb=0, ub=np.inf),
)
optimal_controls = result.x # production rates
final_state = initial_state
for control in optimal_controls:
final_state = dynamics_function(final_state, control)
print('Optimal production rates:')
print(optimal_controls)
print(f'Optimal utility: {-result.fun:.2f}')
print(f'Final resources: {final_state[0]:.2f} finite, {final_state[1]:.2f} accumulated')
if __name__ == '__main__':
main()
Optimal production rates:
[6.22507154 6.02506764 5.82506307 5.62505724 5.42505567 5.22503537
5.02504236 4.82503844 4.62502625 4.42501332 4.22500823 4.02501148
3.8250004 3.62500542 3.42498674 3.22498371 3.02496703 2.82496198
2.62495751 2.42494841 2.22494338 2.02494237 1.82493205 1.62494446
1.4249527 1.22497927 1.02498866 0.82499873 0.62502126 0.42503274
0.22499934 0.02496324 0. 0. 0. 0.
0. 0. 0. 0. ]
Optimal utility: 776.62
Final resources: 0.00 finite, 20.00 accumulated