Can someone explain why my kernel doesn't work when my shared memory array of pointers, TMS, is accessed at some index other than the 0th index (happens in the last line)? If TMS[0] is used in the last line, everything works as expected. When I change TMS[0] to any other index, I get a CUDA unexpected error. Assume 64 threads on one block.
#include <stdio.h>
#include <stdlib.h>
#include "cuda.h"
#include <cuda_runtime.h>
#include <curand_kernel.h>
__global__ void myKern(float *masterForces)
{
int globalIdx = ...// set global thread id
volatile __shared__ float uniques[64];
{
uniques[globalIDx] = 0;
}
__syncthreads();
volatile __shared__ float *TMS[64];
{
TMS[globalIdx] = (&uniques[globalIdx]);
}
__syncthreads();
masterForces[globalIdx] = *TMS[1];
}
Original context if you're curious: (You really don't need to look at this to address my problem)
#include <stdio.h>
#include <stdlib.h>
#include "cuda.h"
#include "curand.h"
#include <cuda_runtime.h>
#include "math.h"
#include <curand_kernel.h>
#include <time.h>
__global__ void myKern(const float *transMatrix, const int *pointerMatrix, float *masterForces, const double *rands, const int r_max)
{
int globalIdx = ((blockIdx.x + (blockIdx.y * gridDim.x)) * (blockDim.x * blockDim.y)) + (threadIdx.x + (threadIdx.y * blockDim.x));
volatile __shared__ float uniques[51];
uniques[0] = transMatrix[0]; uniques[1] = transMatrix[1]; uniques[2] = transMatrix[2]; // 1
uniques[3] = transMatrix[3]; uniques[4] = transMatrix[4]; uniques[5] = transMatrix[12]; // 2
uniques[6] = transMatrix[14]; uniques[7] = transMatrix[15]; uniques[8] = transMatrix[24]; // 3
uniques[9] = transMatrix[26]; uniques[10] = transMatrix[27]; uniques[11] = transMatrix[28]; // 4
uniques[12] = transMatrix[40]; uniques[13] = transMatrix[50]; uniques[14] = transMatrix[60]; // 5
uniques[15] = transMatrix[62]; uniques[16] = transMatrix[146]; uniques[17] = transMatrix[156]; // 6
uniques[18] = transMatrix[158]; uniques[19] = transMatrix[168]; uniques[20] = transMatrix[170]; // 7
uniques[21] = transMatrix[172]; uniques[22] = transMatrix[184]; uniques[23] = transMatrix[290]; // 8
uniques[24] = transMatrix[300]; uniques[25] = transMatrix[302]; uniques[26] = transMatrix[312]; // 9
uniques[27] = transMatrix[314]; uniques[28] = transMatrix[316]; uniques[29] = transMatrix[328]; // 10
uniques[30] = transMatrix[1010]; uniques[31] = transMatrix[1020]; uniques[32] = transMatrix[1022]; // 11
uniques[33] = transMatrix[1032]; uniques[34] = transMatrix[1034]; uniques[35] = transMatrix[1036]; // 12
uniques[36] = transMatrix[1048]; uniques[37] = transMatrix[1154]; uniques[38] = transMatrix[1164]; // 13
uniques[39] = transMatrix[1166]; uniques[40] = transMatrix[1176]; uniques[41] = transMatrix[1178]; // 14
uniques[42] = transMatrix[1180]; uniques[43] = transMatrix[1192]; uniques[44] = transMatrix[2018]; // 15
uniques[45] = transMatrix[2028]; uniques[46] = transMatrix[2030]; uniques[47] = transMatrix[2040]; // 16
uniques[48] = transMatrix[2042]; uniques[49] = transMatrix[2044]; uniques[50] = transMatrix[2056]; // 17
__syncthreads();
volatile __shared__ float *TMS[2592];
for (int t=0; t<2592; t++)
{
for (int m=0; m< 51; m++){
if (pointerMatrix[t] == m)
{
TMS[t] = (&uniques[m]);
}
}
__syncthreads();
int b0 = 0;
int c0 = 0;
int d0 = 0;
int e0 = 0;
int f0 = 0;
int g0 = 0;
int h0 = 0;
int i0 = 0;
int j0 = 0;
int k0 = 0;
int l0 = 0;
int m0 = 0;
int n0 = 0;
int o0 = 0;
int p0 = 0;
int q0 = 0;
int r0 = 0;
int s0 = 0;
int t0 = 0;
int u0 = 0;
int v0 = 0;
int w0 = 0;
int x0 = 0;
int y0 = 0;
int index = 0;
float r = 0.0;
float temp = 0;
int RUsnapshot = 0;
int leftsnap = 0;
curandState s;
curand_init (rands[globalIdx] , 0, 0, &s);
for (int i =0; i < 160000; i++) //@@@depends on iterations @@@@@
{
///////////////////////////////////////////////////
r = curand_uniform(&s);
RUsnapshot = b0;
//index = ((((left[j] * dimen2 + right[j]) * dimen3 + RU[j +1 ]) * dimen4) * dimen5) ;
index = ((((0 * 6 + c0) * 6 + b0) * 2) * 6) ;
b0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
(! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
(! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;
leftsnap = b0;
///////////////////////////////////////////////////////////
///////////////////////////////////////////////////
r = curand_uniform(&s);
RUsnapshot = c0;
//index = ((((left[j] * dimen2 + right[j]) * dimen3 + RU[j +1 ]) * dimen4) * dimen5) ;
index = ((((leftsnap * 6 + d0) * 6 + c0) * 2) * 6) ;
c0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
(! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
(! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;
leftsnap = c0;
///////////////////////////////////////////////////////////
///////////////////////////////////////////////////
r = curand_uniform(&s);
RUsnapshot = d0;
//index = ((((left[j] * dimen2 + right[j]) * dimen3 + RU[j +1 ]) * dimen4) * dimen5) ;
index = ((((leftsnap * 6 + e0) * 6 + d0) * 2) * 6) ;
d0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
(! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
(! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;
leftsnap = d0;
///////////////////////////////////////////////////////////
///////////////////////////////////////////////////
r = curand_uniform(&s);
RUsnapshot = e0;
//index = ((((left[j] * dimen2 + right[j]) * dimen3 + RU[j +1 ]) * dimen4) * dimen5) ;
index = ((((leftsnap * 6 + f0) * 6 + e0) * 2) * 6) ;
e0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
(! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
(! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;
leftsnap = e0;
///////////////////////////////////////////////////////////
///////////////////////////////////////////////////
r = curand_uniform(&s);
RUsnapshot = f0;
//index = ((((left[j] * dimen2 + right[j]) * dimen3 + RU[j +1 ]) * dimen4) * dimen5) ;
index = ((((leftsnap * 6 + g0) * 6 + f0) * 2) * 6) ;
f0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
(! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
(! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;
leftsnap = f0;
///////////////////////////////////////////////////////////
///////////////////////////////////////////////////
r = curand_uniform(&s);
RUsnapshot = g0;
//index = ((((left[j] * dimen2 + right[j]) * dimen3 + RU[j +1 ]) * dimen4) * dimen5) ;
index = ((((leftsnap * 6 + h0) * 6 + g0) * 2) * 6) ;
g0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
(! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
(! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;
leftsnap = g0;
///////////////////////////////////////////////////////////
///////////////////////////////////////////////////
r = curand_uniform(&s);
RUsnapshot = h0;
//index = ((((left[j] * dimen2 + right[j]) * dimen3 + RU[j +1 ]) * dimen4) * dimen5) ;
index = ((((leftsnap * 6 + i0) * 6 + h0) * 2) * 6) ;
h0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
(! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
(! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;
leftsnap = h0;
///////////////////////////////////////////////////////////
///////////////////////////////////////////////////
r = curand_uniform(&s);
RUsnapshot = i0;
//index = ((((left[j] * dimen2 + right[j]) * dimen3 + RU[j +1 ]) * dimen4) * dimen5) ;
index = ((((leftsnap * 6 + j0) * 6 + i0) * 2) * 6) ;
i0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
(! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
(! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;
leftsnap = i0;
///////////////////////////////////////////////////////////
///////////////////////////////////////////////////
r = curand_uniform(&s);
RUsnapshot = j0;
//index = ((((left[j] * dimen2 + right[j]) * dimen3 + RU[j +1 ]) * dimen4) * dimen5) ;
index = ((((leftsnap * 6 + k0) * 6 + j0) * 2) * 6) ;
j0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
(! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
(! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;
leftsnap = j0;
///////////////////////////////////////////////////////////
///////////////////////////////////////////////////
r = curand_uniform(&s);
RUsnapshot = k0;
//index = ((((left[j] * dimen2 + right[j]) * dimen3 + RU[j +1 ]) * dimen4) * dimen5) ;
index = ((((leftsnap * 6 + l0) * 6 + k0) * 2) * 6) ;
k0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
(! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
(! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;
leftsnap = k0;
///////////////////////////////////////////////////////////
///////////////////////////////////////////////////
r = curand_uniform(&s);
RUsnapshot = l0;
//index = ((((left[j] * dimen2 + right[j]) * dimen3 + RU[j +1 ]) * dimen4) * dimen5) ;
index = ((((leftsnap * 6 + m0) * 6 + l0) * 2) * 6) ;
l0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
(! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
(! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;
leftsnap = l0;
///////////////////////////////////////////////////////////
///////////////////////////////////////////////////
r = curand_uniform(&s);
RUsnapshot = m0;
//index = ((((left[j] * dimen2 + right[j]) * dimen3 + RU[j +1 ]) * dimen4) * dimen5) ;
index = ((((leftsnap * 6 + n0) * 6 + m0) * 2) * 6) ;
m0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
(! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
(! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;
leftsnap = m0;
///////////////////////////////////////////////////////////
///////////////////////////////////////////////////
r = curand_uniform(&s);
RUsnapshot = n0;
//index = ((((left[j] * dimen2 + right[j]) * dimen3 + RU[j +1 ]) * dimen4) * dimen5) ;
index = ((((leftsnap * 6 + o0) * 6 + n0) * 2) * 6) ;
n0 += ( r < *TMS[index]) * (*TMS[index + 1]) +
(! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
(! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;
leftsnap = n0;
///////////////////////////////////////////////////////////
///////////////////////////////////////////////////
r = curand_uniform(&s);
RUsnapshot = o0;
//index = ((((left[j] * dimen2 + right[j]) * dimen3 + RU[j +1 ]) * dimen4) * dimen5) ;
index = ((((leftsnap * 6 + p0) * 6 + o0) * 2) * 6) ;
o0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
(! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
(! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;
leftsnap = o0;
///////////////////////////////////////////////////////////
///////////////////////////////////////////////////
r = curand_uniform(&s);
RUsnapshot = p0;
//index = ((((left[j] * dimen2 + right[j]) * dimen3 + RU[j +1 ]) * dimen4) * dimen5) ;
index = ((((leftsnap * 6 + q0) * 6 + p0) * 2) * 6) ;
p0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
(! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
(! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;
leftsnap = p0;
///////////////////////////////////////////////////////////
///////////////////////////////////////////////////
r = curand_uniform(&s);
RUsnapshot = q0;
//index = ((((left[j] * dimen2 + right[j]) * dimen3 + RU[j +1 ]) * dimen4) * dimen5) ;
index = ((((leftsnap * 6 + r0) * 6 + q0) * 2) * 6) ;
q0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
(! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
(! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;
leftsnap = q0;
///////////////////////////////////////////////////////////
///////////////////////////////////////////////////
r = curand_uniform(&s);
RUsnapshot = r0;
//index = ((((left[j] * dimen2 + right[j]) * dimen3 + RU[j +1 ]) * dimen4) * dimen5) ;
index = ((((leftsnap * 6 + s0) * 6 + r0) * 2) * 6) ;
r0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
(! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
(! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;
leftsnap = r0;
///////////////////////////////////////////////////////////
///////////////////////////////////////////////////
r = curand_uniform(&s);
RUsnapshot = s0;
//index = ((((left[j] * dimen2 + right[j]) * dimen3 + RU[j +1 ]) * dimen4) * dimen5) ;
index = ((((leftsnap * 6 + t0) * 6 + s0) * 2) * 6) ;
s0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
(! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
(! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;
leftsnap = s0;
///////////////////////////////////////////////////////////
///////////////////////////////////////////////////
r = curand_uniform(&s);
RUsnapshot = t0;
//index = ((((left[j] * dimen2 + right[j]) * dimen3 + RU[j +1 ]) * dimen4) * dimen5) ;
index = ((((leftsnap * 6 + u0) * 6 + t0) * 2) * 6) ;
t0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
(! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
(! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;
leftsnap = t0;
///////////////////////////////////////////////////////////
///////////////////////////////////////////////////
r = curand_uniform(&s);
RUsnapshot =u0;
//index = ((((left[j] * dimen2 + right[j]) * dimen3 + RU[j +1 ]) * dimen4) * dimen5) ;
index = ((((leftsnap * 6 + v0) * 6 + u0) * 2) * 6) ;
u0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
(! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
(! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;
leftsnap =u0;
///////////////////////////////////////////////////////////
///////////////////////////////////////////////////
r = curand_uniform(&s);
RUsnapshot = v0;
//index = ((((left[j] * dimen2 + right[j]) * dimen3 + RU[j +1 ]) * dimen4) * dimen5) ;
index = ((((leftsnap * 6 + w0) * 6 + v0) * 2) * 6) ;
v0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
(! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
(! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;
leftsnap = v0;
///////////////////////////////////////////////////////////
///////////////////////////////////////////////////
r = curand_uniform(&s);
RUsnapshot = w0;
//index = ((((left[j] * dimen2 + right[j]) * dimen3 + RU[j +1 ]) * dimen4) * dimen5) ;
index = ((((leftsnap * 6 + x0) * 6 + w0) * 2) * 6) ;
w0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
(! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
(! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;
leftsnap = w0;
///////////////////////////////////////////////////////////
///////////////////////////////////////////////////
r = curand_uniform(&s);
RUsnapshot = x0;
//index = ((((left[j] * dimen2 + right[j]) * dimen3 + RU[j +1 ]) * dimen4) * dimen5) ;
index = ((((leftsnap * 6 + y0) * 6 + x0) * 2) * 6) ;
x0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
(! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
(! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;
leftsnap = x0;
///////////////////////////////////////////////////////////
///////////////////////////////////////////////////
r = curand_uniform(&s);
RUsnapshot = y0;
//index = ((((left[j] * dimen2 + right[j]) * dimen3 + RU[j +1 ]) * dimen4) * dimen5) ;
index = ((((leftsnap * 6 + 0) * 6 + y0) * 2) * 6) ;
y0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
(! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
(! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;
///////////////////////////////////////////////////////////
temp = (b0 ==4) + (b0 ==5) + (c0 ==4) + (c0 ==5) + (d0 ==4) + (d0 ==5) + (e0 ==4) + (e0 ==5) + (f0 ==4) + (f0 ==5) +
(g0 ==4) + (g0 ==5) + (h0 ==4) + (h0 ==5) + (i0 ==4) + (i0 ==5) + (j0 ==4) + (j0 ==5) + (k0 ==4) + (k0 ==5) +
(l0 ==4) + (l0 ==5) + (m0 ==4) + (m0 ==5) + (n0 ==4) + (n0 ==5) + (o0 ==4) + (o0 ==5) + (p0 ==4) + (p0 ==5) +
(q0 ==4) + (q0 ==5) + (r0 ==4) + (r0 ==5) + (s0 ==4) + (s0 ==5) + (t0 ==4) + (t0 ==5) + (u0 ==4) + (u0 ==5) +
(v0 ==4) + (v0 ==5) + (w0 ==4) + (w0 ==5) + (x0 ==4) + (x0 ==5) + (y0 ==4) + (y0 ==5);
masterForces[globalIdx + (r_max * i)] = *TMS[1];
temp = 0.0;
}
}
}
Your example code should not give any errors as was proved by Robert Crovella. I looked at your original code and found a misaligned parenthesis cause by that first "for" loop. Good luck...