I am trying to optimize some code and looking for ways to make make a numpy universal function with numba.
The original function has the following signature
import numpy as np
from numba import njit, vectorize
from typing import Sequence
@njit
def f(x: float, y: Sequence[float], z:float = 1e-14) -> float:
"""Computations are only exemplarily"""
a = np.sum(y)
if np.abs(x - a) < z:
return 0.
else:
return np.abs(x - a)
which works as expected.
I would like use numba.vectorize
to create an universal function s.t. f
can be called with
x: np.ndarray | float, y: Sequence[np.ndarray | float], z: float
Also, f
is created in some other part of the code, i.e. the length of y
varies, but is known at the time of definition of f
.
One obvious solution is to make another function which takes (possibly) vectorized input
and do some operations inside using numba.prange
to fil up the result vector.
But I am looking for a more elegant solution using numba.vectorize
and I don't want to deal with
broadcasting, since x
can be a float, or an element of y
can be a float.
Best regards,
I tried
f_v = vectorize(
[
numba.float64(
numba.float64,
numba.float64[:],
numba.float64,
)
],
nopython=True,
)(f)
But it failed to compile, no matching signature found.
Edit:
f
performs simple arithmetics on scalars. I inserted some dummy computations since the originals are rather lengthy.
The vectorized version should perform those computations component-wise and return an array.
You can use guvectorize instead of vectorize.
@numba.guvectorize("(float64, float64[:], float64, float64[:])", "(),(n),()->()", target="parallel")
def f_vectorized(x, y, z, out):
out[0] = f(x, y, z)
result1 = f_vectorized(1.0, np.array([1.0, 2.0]), 1e-14)
result2 = f_vectorized(np.array([1.0]), np.array([[1.0, 2.0]]), 1e-14)
result3 = f_vectorized(np.array([1.0, 2.0, 3.0]), np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]), 1e-14)
result4 = f_vectorized(np.array([1.0, 2.0, 3.0]), [np.array([1.0, 2.0]), np.array([3.0, 4.0]), np.array([5.0, 6.0])], 1e-14)
However, there are two issues to consider.
The first is obvious: z
cannot have a default value.
To get around this, you have to create another function.
def f_vectorized_with_default_value(x, y, z=1e-14):
return f_vectorized(x, y, z)
The second is more critical: it does not work well if y
is a python list of numpy arrays. That is, this case:
result4 = f_vectorized(np.array([1.0, 2.0, 3.0]), [np.array([1.0, 2.0]), np.array([3.0, 4.0]), np.array([5.0, 6.0])], 1e-14)
Here is the benchmark code.
import time
import timeit
import warnings
import numpy as np
from typing import Sequence
import numba
@numba.njit(cache=True)
def f(x: float, y: Sequence[float], z: float = 1e-14) -> float:
"""Computations are only exemplarily"""
a = np.sum(y)
if np.abs(x - a) < z:
return 0.0
else:
return np.abs(x - a)
@numba.njit(parallel=True, cache=True)
def f_prange(x, y, z):
"""Implementation with prange as a baseline."""
n = len(x)
out = np.empty(n, dtype=x.dtype)
for i in numba.prange(n):
out[i] = f(x[i], y[i], z)
return out
def f_loop(x, y, z):
"""Another baseline."""
n = len(x)
out = np.empty(n, dtype=x.dtype)
for i in range(n):
out[i] = f(x[i], y[i], z)
return out
@numba.guvectorize("(float64, float64[:], float64, float64[:])", "(),(n),()->()", target="parallel", cache=True)
def f_vectorized(x, y, z, out):
out[0] = f(x, y, z)
def f_vectorized_with_default_value(x, y, z=1e-14):
return f_vectorized(x, y, z)
def test(title, func, x, y, z, expected, number=10):
assert np.array_equal(func(x, y, z), expected)
elapsed = timeit.timeit(lambda: func(x, y, z), number=number) / number
print(f"{title}: {elapsed}")
def main():
rng = np.random.default_rng(0)
n = 1000000
m = 1000
x = rng.random(size=(n,), dtype=np.float64)
y_as_numpy = rng.random(size=(n, m), dtype=np.float64)
y_as_list = [y_as_numpy[i] for i in range(n)]
z = 1e-14
started = time.perf_counter()
_ = np.array(y_as_list)
print("merge into a np array:", time.perf_counter() - started)
started = time.perf_counter()
y_as_typed_list = numba.typed.List(y_as_list)
print("convert to typed list:", time.perf_counter() - started)
expected = f(x[0], y_as_numpy[0], z)
test("f(single)", f, x[0], y_as_numpy[0], z, expected)
test("f_vectorized(single)", f_vectorized, x[0], y_as_numpy[0], z, expected)
expected = f_prange(x, y_as_numpy, z)
test("f_prange(numpy)", f_prange, x, y_as_numpy, z, expected)
with warnings.catch_warnings():
warnings.simplefilter("ignore", numba.NumbaPendingDeprecationWarning)
test("f_prange(python_list)", f_prange, x, y_as_list, z, expected)
test("f_prange(typed_list)", f_prange, x, y_as_typed_list, z, expected)
test("f_loop(python_list)", f_loop, x, y_as_list, z, expected)
test("f_vectorized(numpy)", f_vectorized, x, y_as_numpy, z, expected)
test("f_vectorized(python_list)", f_vectorized, x, y_as_list, z, expected)
test("f_vectorized(typed_list)", f_vectorized, x, y_as_typed_list, z, expected)
if __name__ == "__main__":
main()
Result:
merge into a np array: 1.3248698
convert to typed list: 1.6472616999999996
f(single): 1.430000000013365e-06
f_vectorized(single): 1.6320000000025204e-05
f_prange(numpy): 0.18999052999999985
f_prange(python_list): 7.21668874
f_prange(typed_list): 0.20048094999999932 + (convert to typed list)
f_loop(python_list): 1.3346305299999996
f_vectorized(numpy): 0.18892754000000025
f_vectorized(python_list): 1.8655723899999999
f_vectorized(typed_list): 3.323696179999999 + (convert to typed list)
As you can see from the last two lines of results, it is very slow. It works, but slow.
In fact, it is faster to call the f
function sequentially in a python loop (not numba.prange
, pure python loop).
If y
is a 2D numpy array in the first place, this will not bother you. f_vectorized
should work fine.
But if it is a python list, you might have to find another trick.