(I'm new to stack overflow, but I will try to write my problem the best way I can) For my thesis, I need to do the optimization for a mean squares error problem as fast as possible. For this problem, I used to use the scipy.optimize.minimize method (with and without jacobian). However; the optimization was still too slow for what we wanted to do. (This program is running on mac with python 3.9)
So first, this is the function to minimize (I already tried to simplify the formula, but it didn't change the speed of the program
def _residuals_mse(coef, unshimmed_vec, coil_mat, factor):
""" Objective function to minimize the mean squared error (MSE)
Args:
coef (numpy.ndarray): 1D array of channel coefficients
unshimmed_vec (numpy.ndarray): 1D flattened array (point)
coil_mat (numpy.ndarray): 2D flattened array (point, channel) of masked coils
(axis 0 must align with unshimmed_vec)
factor (float): Devise the result by 'factor'. This allows to scale the output for the minimize function to avoid positive directional linesearch
Returns:
scalar: Residual for least squares optimization
"""
# MSE regularized to minimize currents
return np.mean((unshimmed_vec + np.sum(coil_mat * coef, axis=1, keepdims=False)) ** 2) / factor + \ (self.reg_factor * np.mean(np.abs(coef) / self.reg_factor_channel))
This is the jacobian of the function ( There is maybe a way to make it faster but I didn't succeed to do it)
def _residuals_mse_jacobian( coef, unshimmed_vec, coil_mat, factor):
""" Jacobian of the function that we want to minimize, note that normally b is calculates somewhere else
Args:
coef (numpy.ndarray): 1D array of channel coefficients
unshimmed_vec (numpy.ndarray): 1D flattened array (point) of the masked unshimmed map
coil_mat (numpy.ndarray): 2D flattened array (point, channel) of masked coils
(axis 0 must align with unshimmed_vec)
factor (float): integer
Returns:
jacobian (numpy.ndarray) : 1D array of the gradient of the mse function to minimize
"""
b = (2 / (unshimmed_vec.size * factor))
jacobian = np.array([
self.b * np.sum((unshimmed_vec + np.matmul(coil_mat, coef)) * coil_mat[:, j]) + \
np.sign(coef[j]) * (self.reg_factor / (9 * self.reg_factor_channel[j]))
for j in range(coef.size)
])
return jacobian
And so this is the "main" program
import numpy as np
import scipy.optimize as opt
from numpy.random import default_rng
rand = default_rng(seed=0)
reg_factor_channel = rand.integers(1, 10, size=9)
coef = np.zeros(9)
unshimmed_vec = np.random.randint(100, size=(150))
coil_mat = np.random.randint(100, size=(150,9))
factor = 2
self.reg_factor = 5
currents_sp = opt.minimize(_residuals_mse, coef,
args=(unshimmed_vec, coil_mat, factor),
method='SLSQP',
jac = _residuals_mse_jacobian,
options={'maxiter': 1000})
On my computer, the optimization takes around 40 ms for a dataset of this size.
The matrices in the example are usually obtained after some modifications and can be way way bigger, but here to make it clear and easy to test, I choose some arbitrary ones. In addition, this optimization is done many times (Sometimes up to 50 times), so, we are already doing multiprocessing (To do different optimization at the same time). However on mac, mp is slow to start because of the spawning method (because fork is not stable on python 3.9). For this reason, I am trying to make the optimization as fast as possible to maybe remove multiprocessing.
Do any of you know how to make this code faster in python ? Also, this code will be available in open source for users, so I can only free solver (unlike MOSEK)
Edit : I tried to run the code by using the CVXPY model, with this code after the one just above:
m = currents_0.size
n = unshimmed_vec.size
coef = cp.Variable(m)
unshimmed_vec2 = cp.Parameter((n))
coil_mat2 = cp.Parameter((n,m))
unshimmed_vec2.value = unshimmed_vec
coil_mat2.value = coil_mat
x1 = unshimmed_vec2 + cp.matmul(coil_mat2,coef)
x2 = cp.sum_squares(x1) / (factor*n)
x3 = self.reg_factor / self.reg_factor_channel@ cp.abs(coef) / m
obj = cp.Minimize(x2 + x3)
prob = cp.Problem(obj)
prob.solve(solver=SCS)
However, this is slowing even more my code, and it gives me a different value than with scipy.optimize.minimize, so does anyone see a problem in this code ?
I will make some sweeping assumptions:
_criteria_func
and instead optimize _residuals_mse
;reg_factor_channel
will never have zeros; andRecognize that your inner expressions can be simplified:
np.sum(coil_mat * coef)
, since it uses a broadcast, is really just a matrix multiplicationmean(**2)
on a vector is really just a self-dot-product divided by the lengthThis leaves us with the following, starting without the Jacobian:
import numpy as np
from numpy.random import default_rng
from scipy import optimize as opt
from timeit import timeit
rand = default_rng(seed=0)
reg_factor = 5
reg_factor_channel = rand.integers(1, 10, size=9)
reg_vector = reg_factor / len(reg_factor_channel) / reg_factor_channel
def residuals_mse(
coef: np.ndarray,
unshimmed_vec: np.ndarray,
coil_mat: np.ndarray,
factor: float,
) -> float:
inner = unshimmed_vec + coil_mat@coef
return inner.dot(inner)/len(inner)/factor + np.abs(coef).dot(reg_vector)
def old_residuals_mse(coef, unshimmed_vec, coil_mat, factor):
return np.mean(
(unshimmed_vec + np.sum(coil_mat * coef, axis=1, keepdims=False)) ** 2) / factor + (
reg_factor * np.mean(np.abs(coef) / reg_factor_channel))
def main() -> None:
unshimmed_vec = rand.integers(100, size=150)
coil_mat = rand.integers(100, size=(150, 9))
factor = 2
args = unshimmed_vec, coil_mat, factor
currents_sp = None
def run():
nonlocal currents_sp
currents_sp = opt.minimize(
fun=residuals_mse,
x0=np.zeros_like(reg_factor_channel),
args=args,
method='SLSQP',
)
t = timeit(run, number=1)
print(currents_sp)
print(t, 'seconds')
r_old = old_residuals_mse(currents_sp.x, *args)
assert np.isclose(r_old, currents_sp.fun)
if __name__ == '__main__':
main()
with output
message: Optimization terminated successfully
success: True
status: 0
fun: 435.166150155064
x: [-1.546e-01 -8.305e-02 -1.637e-01 -1.106e-01 -1.033e-01
-8.792e-02 -9.908e-02 -8.666e-02 -1.217e-01]
nit: 7
jac: [-1.179e-01 -1.621e-01 -1.112e-01 -1.765e-01 -1.678e-01
-1.570e-01 -1.456e-01 -1.722e-01 -1.299e-01]
nfev: 94
njev: 7
0.012324300012551248 seconds
The Jacobian does indeed help, but has been written in a way that is not properly vectorised. Once vectorised it looks like:
import numpy as np
from numpy.random import default_rng
from scipy import optimize as opt
from timeit import timeit
rand = default_rng(seed=0)
reg_factor = 5
reg_factor_channel = rand.integers(1, 10, size=9)
reg_vector = reg_factor / len(reg_factor_channel) / reg_factor_channel
def residuals_mse(
coef: np.ndarray,
unshimmed_vec: np.ndarray,
coil_mat: np.ndarray,
factor: float,
) -> float:
inner = unshimmed_vec + coil_mat@coef
return inner.dot(inner)/len(inner)/factor + np.abs(coef).dot(reg_vector)
def old_residuals_mse(coef, unshimmed_vec, coil_mat, factor):
return np.mean(
(unshimmed_vec + np.sum(coil_mat * coef, axis=1, keepdims=False)) ** 2) / factor + (
reg_factor * np.mean(np.abs(coef) / reg_factor_channel))
def residuals_mse_jacobian(
coef: np.ndarray,
unshimmed_vec: np.ndarray,
coil_mat: np.ndarray,
factor: float,
) -> np.ndarray:
b = 2 / unshimmed_vec.size / factor
return b * (
unshimmed_vec + coil_mat@coef
)@coil_mat + np.sign(coef) * reg_vector
def old_residuals_mse_jacobian(coef, unshimmed_vec, coil_mat, factor):
b = (2 / (unshimmed_vec.size * factor))
jacobian = np.array([
b * np.sum((unshimmed_vec + np.matmul(coil_mat, coef)) * coil_mat[:, j]) +
np.sign(coef[j]) * (reg_factor / (9 * reg_factor_channel[j]))
for j in range(coef.size)
])
return jacobian
def main() -> None:
unshimmed_vec = rand.integers(100, size=150)
coil_mat = rand.integers(100, size=(150, 9))
factor = 2
args = unshimmed_vec, coil_mat, factor
currents_sp = None
def run():
nonlocal currents_sp
currents_sp = opt.minimize(
fun=residuals_mse,
x0=np.zeros_like(reg_factor_channel),
args=args,
method='SLSQP',
jac=residuals_mse_jacobian,
)
t = timeit(run, number=1)
print(currents_sp)
print(t, 'seconds')
r_old = old_residuals_mse(currents_sp.x, *args)
assert np.isclose(r_old, currents_sp.fun)
j_new = residuals_mse_jacobian(currents_sp.x, *args)
j_old = old_residuals_mse_jacobian(currents_sp.x, *args)
assert np.allclose(j_old, j_new)
if __name__ == '__main__':
main()
message: Optimization terminated successfully
success: True
status: 0
fun: 435.1661470650057
x: [-1.546e-01 -8.305e-02 -1.637e-01 -1.106e-01 -1.033e-01
-8.792e-02 -9.908e-02 -8.666e-02 -1.217e-01]
nit: 7
jac: [-4.396e-02 -8.791e-02 -3.385e-02 -9.817e-02 -9.516e-02
-8.223e-02 -7.154e-02 -9.907e-02 -5.939e-02]
nfev: 31
njev: 7
0.005059799994342029 seconds