This is my pycuda code.When I execute I get threading errors.I am trying to implement pycuda threads in this.I have searched google but couldn't find an answer.Respurces are also very limited for pycuda I'm the Internet.So guys please help me out.
{{{#!python
import pycuda
import pycuda.driver as cuda
from pycuda.compiler import SourceModule
import threading
import numpy
class GPUThread(threading.Thread):
def __init__(self, number, some_array):
threading.Thread.__init__(self)
self.number = number
self.some_array = some_array
def run(self):
self.dev = cuda.Device(self.number)
self.ctx = self.dev.make_context()
self.array_gpu = cuda.mem_alloc(some_array.nbytes)
cuda.memcpy_htod(self.array_gpu, some_array)
test_kernel(self.array_gpu)
print "successful exit from thread %d" % self.number
self.ctx.pop()
del self.array_gpu
del self.ctx
def test_kernel(input_array_gpu):
mod = SourceModule("""
__global__ void f(float * out, float * in)
{
int idx = threadIdx.x;
out[idx] = in[idx] + 6;
}
""")
func = mod.get_function("f")
output_array = numpy.zeros((1,512))
output_array_gpu = cuda.mem_alloc(output_array.nbytes)
func(output_array_gpu,
input_array_gpu,
block=(512,1,1))
cuda.memcpy_dtoh(output_array, output_array_gpu)
return output_array
cuda.init()
some_array = numpy.ones((1,512), dtype=numpy.float32)
num = cuda.Device.count()
gpu_thread_list = []
for i in range(num):
gpu_thread = GPUThread(i, some_array)
gpu_thread.start()
}}}
You forgot to append the gpu thread.Just add gpu_thread_list.append(gpu_thread)
after start() in your code and it will work.There are quite a lot of good tutorial for pycuda online.Check this.