pythonnumpymatrixvectorizationnumba

# Vectorization of complicated matrix calculation in Python

I have a 3x3 matrix which I can calculate as follows

``````    e_matrix = np.array([m[i]*m[j]-n[i]*n[j] for i in range(3) for j in range(3)])
``````

where `m` and `n` are both length-3 vectors. This works ok ✅

Now suppose that `m` and `n` are matrices of shape `(K,3)`. I want to do an analogous calculation to the above and get an `e_matrix` of shape `(3,3,K)`.

I realise I can just do a naive approach e.g.

``````
e_matrix = np.zeros((3,3,K))

for k in range(K):
e_matrix[:,:,k] = np.array([m[k,i]*m[k,j]-n[k,i]*n[k,j] for i in range(3) for j in range(3)])

``````

Does a vectorized approach exist? Ideally one that works with Numba/JIT compiling (so no e.g. `np.tensordot`).

Solution

• This is the "vectorized" variation using numba.

``````import numpy as np
import numba

@numba.guvectorize("(k),(k)->(k,k)", nopython=True)
def nb_vec_impl(m, n, out):
for i in range(3):
for j in range(3):
out[i, j] = m[i] * m[j] - n[i] * n[j]

def nb_vec(m, n):
K = len(m)
e_matrix = np.zeros((3, 3, K))  # Notice the position of K.
# The matrix you want is (3,3,K), but this function accepts (K,3,3).
# Instead of transposing the results, the same thing can be done by passing a transposed buffer.
nb_vec_impl(m, n, e_matrix.T)
return e_matrix
``````

This function is fast when K is very large (e.g., 100,000), but, unfortunately, very slow when K is small due to the large overhead.

If performance is your priority, I would prefer a simple jit function.

``````import numpy as np
import numba

@numba.njit
def nb_jit(m, n):
K = len(m)
e_matrix = np.zeros((3, 3, K))

for k in range(K):
for i in range(3):
for j in range(3):
e_matrix[i, j, k] = m[k, i] * m[k, j] - n[k, i] * n[k, j]

return e_matrix
``````

Benchmark:

``````import timeit

import numba
import numpy as np

def native(m, n):
K = len(m)
e_matrix = np.zeros((3, 3, K))

for k in range(K):
e_matrix[:, :, k] = np.array([[m[k, i] * m[k, j] - n[k, i] * n[k, j] for i in range(3)] for j in range(3)])

return e_matrix

m, n = m.T, n.T
return m[:, None] * m[None, :] - n[:, None] * n[None, :]

@numba.njit
m, n = m.T, n.T
return m[:, None] * m[None, :] - n[:, None] * n[None, :]

@numba.njit
def func2(m, n):
m, n = m.T, n.T
x, y = m.shape
return m.reshape(x, 1, y) * m.reshape(1, x, y) - n.reshape(1, x, y) * n.reshape(x, 1, y)

@numba.njit
def nb_jit(m, n):
K = len(m)
e_matrix = np.zeros((3, 3, K))

for k in range(K):
for i in range(3):
for j in range(3):
e_matrix[i, j, k] = m[k, i] * m[k, j] - n[k, i] * n[k, j]

return e_matrix

@numba.guvectorize("(k),(k)->(k,k)", nopython=True)
def nb_vec_impl(m, n, out):
for i in range(3):
for j in range(3):
out[i, j] = m[i] * m[j] - n[i] * n[j]

def nb_vec(m, n):
K = len(m)
e_matrix = np.zeros((3, 3, K))
nb_vec_impl(m, n, e_matrix.T)  # numba can accept transposed arrays.
return e_matrix

def main():
rng = np.random.default_rng(0)
K = 100_000
# K = 100
m = rng.random((K, 3))
n = rng.random((K, 3))

expected = native(m, n)

for f in candidates:
assert np.array_equal(f(m, n), expected), f"{f.__name__}"
n_run = 10
elapsed = timeit.timeit(lambda: f(m, n), number=n_run) / n_run
print(f"{f.__name__}: {elapsed * 1000:.3f} ms")

if __name__ == "__main__":
main()
``````

Results (for K = 100_000):

``````native: 778.374 ms
func2: 6.207 ms
nb_jit: 1.998 ms
nb_vec: 1.891 ms
``````

Results (for K = 100):

``````native: 0.798 ms