I'm trying to parallelize this numba jitted function. I initially thought this would be trivial since it is an embarrassingly parallel problem, but it produces unexpected results (different output for the sequential and parallel 'implementations'). Any idea what could be happening there and whether there are ways to make this work?
See reproducible example below:
import numba
import numpy as np
from sklearn.datasets import make_regression
@numba.jit(nopython=True)
def nanlstsq(X, y):
"""Return the least-squares solution to a linear matrix equation
Analog to ``numpy.linalg.lstsq`` for dependant variable containing ``Nan``
Args:
X ((M, N) np.ndarray): Matrix of independant variables
y ({(M,), (M, K)} np.ndarray): Matrix of dependant variables
Returns:
np.ndarray: Least-squares solution, ignoring ``Nan``
"""
beta = np.zeros((X.shape[1], y.shape[1]), dtype=np.float64)
for idx in range(y.shape[1]):
# subset y and X
isna = np.isnan(y[:,idx])
X_sub = X[~isna]
y_sub = y[~isna,idx]
# Compute beta on data subset
XTX = np.linalg.inv(np.dot(X_sub.T, X_sub))
XTY = np.dot(X_sub.T, y_sub)
beta[:,idx] = np.dot(XTX, XTY)
return beta
@numba.jit(nopython=True, parallel=True)
def nanlstsq_parallel(X, y):
beta = np.zeros((X.shape[1], y.shape[1]), dtype=np.float64)
for idx in numba.prange(y.shape[1]):
# subset y and X
isna = np.isnan(y[:,idx])
X_sub = X[~isna]
y_sub = y[~isna,idx]
# Compute beta on data subset
XTX = np.linalg.inv(np.dot(X_sub.T, X_sub))
XTY = np.dot(X_sub.T, y_sub)
beta[:,idx] = np.dot(XTX, XTY)
return beta
# Generate random data
n_targets = 10000
n_features = 3
X, y = make_regression(n_samples=200, n_features=n_features,
n_targets=n_targets)
# Add random nan to y array
y.ravel()[np.random.choice(y.size, 5*n_targets, replace=False)] = np.nan
# Run the regression
beta = nanlstsq(X, y)
beta_parallel = nanlstsq_parallel(X, y)
np.testing.assert_allclose(beta, beta_parallel)
Not sure what is causing the difference between the sequential and parallel versions, but computing the inverse of X_sub.T @ X_sub
is not recommended for solving matrix equations - don't invert that matrix. If you still want to compute the normal equations then you should use numpy.linalg.solve or without computing the normal equations, you can use numpy.linalg.lstsq both of which are compatible with numba and pass your assert_allclose
test with the sequential version:
@numba.jit(nopython=True, parallel=True)
def nanlstsq_parallel_solve(X, y):
beta = np.zeros((X.shape[1], y.shape[1]), dtype=np.float64)
for idx in numba.prange(y.shape[1]):
# subset y and X
isna = np.isnan(y[:,idx])
X_sub = X[~isna]
y_sub = y[~isna,idx]
beta[:, idx] = np.linalg.solve(np.dot(X_sub.T, X_sub), np.dot(X_sub.T, y_sub))
return beta
@numba.jit(nopython=True, parallel=True)
def nanlstsq_parallel_lstsq(X, y):
beta = np.zeros((X.shape[1], y.shape[1]), dtype=np.float64)
for idx in numba.prange(y.shape[1]):
# subset y and X
isna = np.isnan(y[:,idx])
X_sub = X[~isna]
y_sub = y[~isna,idx]
beta[:, idx] = np.linalg.lstsq(X_sub, y_sub)[0]
return beta
beta_parallel_solve = nanlstsq_parallel_solve(X, y)
beta_parallel_lstsq = nanlstsq_parallel_lstsq(X, y)
np.testing.assert_allclose(beta, beta_parallel_solve)
np.testing.assert_allclose(beta, beta_parallel_lstsq)