I intended to write a kernel in PyCUDA to generate 2d Gaussian patches. However, values defined by me in the host change after copy them into device. Below is the code.
import numpy as np
import matplotlib.pyplot as plt
import pycuda.driver as cuda
from pycuda.compiler import SourceModule
import pycuda.autoinit
# kernel
kernel = SourceModule("""
#include <stdio.h>
__global__ void gaussian2D(float *output, float x, float y, float sigma, int
n_rows, int n_cols)
{
int i = threadIdx.x + blockIdx.x * blockDim.x;
int j = threadIdx.y + blockIdx.y * blockDim.y;
printf("%d ", n_cols);
if (i < n_cols && j < n_rows) {
size_t idx = j*n_cols +i;
//printf("%d ", idx);
}
}
""")
# host code
def gpu_gaussian2D(point, sigma, shape):
# Convert parameters into numpy array
x, y = np.array(point, dtype=np.float32)
sigma = np.float32(sigma)
n_rows, n_cols = np.array(shape, dtype=np.int)
print(n_rows)
output = np.empty((1, shape[0]*shape[1]), dtype= np.float32)
# Get kernel function
gaussian2D = kernel.get_function("gaussian2D")
# Define block, grid and compute
blockDim = (32, 32, 1) # 1024 threads in total
dx, mx = divmod(shape[1], blockDim[0])
dy, my = divmod(shape[0], blockDim[1])
gridDim = ((dx + (mx>0)), (dy + (my>0)), 1)
# Kernel function
gaussian2D (
cuda.Out(output), cuda.In(x), cuda.In(y), cuda.In(sigma),
cuda.In(n_rows), cuda.In(n_cols),
block=blockDim, grid=gridDim)
return output
point = (5, 5)
sigma = 3.0
shape = (10, 10)
result = gpu_gaussian2D(point, sigma, shape)
After checking the print value of n_cols
, it is NOT 10 as expected. anyone can help me, I cannot figure out what's going wrong here.
.In()
and .Out()
are only used for buffers that will be passed via pointer parameters in the kernel (so only applicable to output
here). Ordinary pass-by-value parameters can be used directly.
$ cat t7.py
import numpy as np
# import matplotlib.pyplot as plt
import pycuda.driver as cuda
from pycuda.compiler import SourceModule
import pycuda.autoinit
# kernel
kernel = SourceModule("""
#include <stdio.h>
__global__ void gaussian2D(float *output, float x, float y, float sigma, int
n_rows, int n_cols)
{
int i = threadIdx.x + blockIdx.x * blockDim.x;
int j = threadIdx.y + blockIdx.y * blockDim.y;
printf("%d ", n_cols);
if (i < n_cols && j < n_rows) {
size_t idx = j*n_cols +i;
//printf("%d ", idx);
}
}
""")
# host code
def gpu_gaussian2D(point, sigma, shape):
# Convert parameters into numpy array
x, y = np.array(point, dtype=np.float32)
sigma = np.float32(sigma)
n_rows, n_cols = np.array(shape, dtype=np.int)
print(n_rows)
output = np.empty((1, shape[0]*shape[1]), dtype= np.float32)
# Get kernel function
gaussian2D = kernel.get_function("gaussian2D")
# Define block, grid and compute
blockDim = (32, 32, 1) # 1024 threads in total
dx, mx = divmod(shape[1], blockDim[0])
dy, my = divmod(shape[0], blockDim[1])
gridDim = ((dx + (mx>0)), (dy + (my>0)), 1)
# Kernel function
gaussian2D (
cuda.Out(output), x, y, sigma,
n_rows, n_cols,
block=blockDim, grid=gridDim)
return output
point = (5, 5)
sigma = 3.0
shape = (10, 10)
result = gpu_gaussian2D(point, sigma, shape)
$ python t7.py
10
10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10