I'm using numba with python for using my gpu.
This is my code sample.
import numpy as np
from numba import guvectorize
@vectorize(["boolean(float64, int64, int64)"], target="cuda")
def vector_add_gpu(a, b, c):
"""
Do something
"""
return True
def main():
a_source = np.ones(10, dtype=np.float64)
b_source = np.ones(100000, dtype=np.int64)
d_source = 10
# Time the GPU function
start = timer()
vector_add_gpu(a_source, b_source, d_source)
vector_add_gpu_time = timer() - start
print("GPU function took %f seconds." % vector_add_gpu_time)
return 0
if __name__ == "__main__":
main()
But I got this error.
failed to broadcast argument #1
If I put same shape of arguments, it works.
Like
def main():
a_source = np.ones(100000, dtype=np.float64)
b_source = np.ones(100000, dtype=np.int64)
d_source = 10
Sadly, I must use different shape of numpy arrays on my code.
So, can "vectorize" be used only if the shape of the numpy input is the same?
I apologize for my lack of explanation.
I just want to run the function using numba with cuda. Because my code is slow...
Here is my code
#@vectorize(["boolean(int64, uint8, int64, int64, int64)"], target="cuda")
#@guvectorize(["void(int64, uint8, int64, int64, int64)"], '(), (), (), ()->()', target="cuda")
def _deleting_from_endpoints(coords, input_image, ar_x, ar_y, max_value):
for (x, y) in coords:
ar_x[0], ar_y[0] = x, y
count = 0
for i in range(1, max_value):
count += 1
x_, y_ = ar_x[i - 1], ar_y[i - 1]
input_image[x_, y_] = 0
if count > max_value:
# input_image[ar_x[:count], ar_y[:count]] = 1
for v in range(count):
input_image[ar_x[count], ar_y[count]] = 1
break
x__, y__ = np.where(input_image[x_ - 1:x_ + 2, y_ - 1:y_ + 2])
if len(x__) != 0:
ar_x[i] = x_ + x__[0] - 1
ar_y[i] = y_ + y__[0] - 1
else:
break
return True
Arguments type
coords: numpy array dtype=int64
input_image: numpy array dtype=uint8
ar_x: numpy array dtype=int64
ar_y: numpy array dtype=int64
max_value: int