I have a 3x3 matrix which I can calculate as follows
e_matrix = np.array([m[i]*m[j]-n[i]*n[j] for i in range(3) for j in range(3)])
where m
and n
are both length-3 vectors. This works ok ✅
Now suppose that m
and n
are matrices of shape (K,3)
. I want to do an analogous calculation to the above and get an e_matrix
of shape (3,3,K)
.
I realise I can just do a naive approach e.g.
e_matrix = np.zeros((3,3,K))
for k in range(K):
e_matrix[:,:,k] = np.array([m[k,i]*m[k,j]-n[k,i]*n[k,j] for i in range(3) for j in range(3)])
Does a vectorized approach exist? Ideally one that works with Numba/JIT compiling (so no e.g. np.tensordot
).
This is the "vectorized" variation using numba.
import numpy as np
import numba
@numba.guvectorize("(k),(k)->(k,k)", nopython=True)
def nb_vec_impl(m, n, out):
for i in range(3):
for j in range(3):
out[i, j] = m[i] * m[j] - n[i] * n[j]
def nb_vec(m, n):
K = len(m)
e_matrix = np.zeros((3, 3, K)) # Notice the position of K.
# The matrix you want is (3,3,K), but this function accepts (K,3,3).
# Instead of transposing the results, the same thing can be done by passing a transposed buffer.
nb_vec_impl(m, n, e_matrix.T)
return e_matrix
This function is fast when K is very large (e.g., 100,000), but, unfortunately, very slow when K is small due to the large overhead.
If performance is your priority, I would prefer a simple jit function.
import numpy as np
import numba
@numba.njit
def nb_jit(m, n):
K = len(m)
e_matrix = np.zeros((3, 3, K))
for k in range(K):
for i in range(3):
for j in range(3):
e_matrix[i, j, k] = m[k, i] * m[k, j] - n[k, i] * n[k, j]
return e_matrix
Benchmark:
import timeit
import numba
import numpy as np
def native(m, n):
K = len(m)
e_matrix = np.zeros((3, 3, K))
for k in range(K):
e_matrix[:, :, k] = np.array([[m[k, i] * m[k, j] - n[k, i] * n[k, j] for i in range(3)] for j in range(3)])
return e_matrix
def broadcast(m, n):
m, n = m.T, n.T
return m[:, None] * m[None, :] - n[:, None] * n[None, :]
@numba.njit
def broadcast_nb(m, n):
m, n = m.T, n.T
return m[:, None] * m[None, :] - n[:, None] * n[None, :]
@numba.njit
def func2(m, n):
m, n = m.T, n.T
x, y = m.shape
return m.reshape(x, 1, y) * m.reshape(1, x, y) - n.reshape(1, x, y) * n.reshape(x, 1, y)
@numba.njit
def nb_jit(m, n):
K = len(m)
e_matrix = np.zeros((3, 3, K))
for k in range(K):
for i in range(3):
for j in range(3):
e_matrix[i, j, k] = m[k, i] * m[k, j] - n[k, i] * n[k, j]
return e_matrix
@numba.guvectorize("(k),(k)->(k,k)", nopython=True)
def nb_vec_impl(m, n, out):
for i in range(3):
for j in range(3):
out[i, j] = m[i] * m[j] - n[i] * n[j]
def nb_vec(m, n):
K = len(m)
e_matrix = np.zeros((3, 3, K))
nb_vec_impl(m, n, e_matrix.T) # numba can accept transposed arrays.
return e_matrix
def main():
rng = np.random.default_rng(0)
K = 100_000
# K = 100
m = rng.random((K, 3))
n = rng.random((K, 3))
candidates = [native, broadcast, broadcast_nb, func2, nb_jit, nb_vec]
expected = native(m, n)
# expected = broadcast(m, n)
for f in candidates:
assert np.array_equal(f(m, n), expected), f"{f.__name__}"
n_run = 10
elapsed = timeit.timeit(lambda: f(m, n), number=n_run) / n_run
print(f"{f.__name__}: {elapsed * 1000:.3f} ms")
if __name__ == "__main__":
main()
Results (for K = 100_000):
native: 778.374 ms
broadcast: 13.581 ms
broadcast_nb: 2.848 ms
func2: 6.207 ms
nb_jit: 1.998 ms
nb_vec: 1.891 ms
Results (for K = 100):
native: 0.798 ms
broadcast: 0.016 ms
broadcast_nb: 0.004 ms
func2: 0.007 ms
nb_jit: 0.002 ms
nb_vec: 0.127 ms