Search code examples
c++pointersmemorycudashared

CUDA: Using a global thread index with shared memory won't work


Can someone explain why my kernel doesn't work when my shared memory array of pointers, TMS, is accessed at some index other than the 0th index (happens in the last line)? If TMS[0] is used in the last line, everything works as expected. When I change TMS[0] to any other index, I get a CUDA unexpected error. Assume 64 threads on one block.

#include <stdio.h>
#include <stdlib.h>
#include "cuda.h"
#include <cuda_runtime.h>
#include <curand_kernel.h>

__global__ void myKern(float *masterForces)
{
    int globalIdx = ...// set global thread id

    volatile __shared__ float uniques[64];

    {
        uniques[globalIDx] = 0;
    }

    __syncthreads();


    volatile __shared__ float *TMS[64]; 

    {
       TMS[globalIdx] = (&uniques[globalIdx]);
    }

    __syncthreads();

    masterForces[globalIdx] = *TMS[1];
}

Original context if you're curious: (You really don't need to look at this to address my problem)

#include <stdio.h>
#include <stdlib.h>
#include "cuda.h"
#include "curand.h"
#include <cuda_runtime.h>
#include "math.h"
#include <curand_kernel.h>
#include <time.h>


__global__ void myKern(const float *transMatrix, const int *pointerMatrix, float *masterForces, const double *rands, const int r_max)
{




int globalIdx = ((blockIdx.x + (blockIdx.y * gridDim.x)) * (blockDim.x * blockDim.y)) + (threadIdx.x + (threadIdx.y * blockDim.x));

volatile __shared__ float uniques[51];

uniques[0] = transMatrix[0]; uniques[1] = transMatrix[1]; uniques[2] = transMatrix[2]; // 1
uniques[3] = transMatrix[3]; uniques[4] = transMatrix[4]; uniques[5] = transMatrix[12]; // 2
uniques[6] = transMatrix[14]; uniques[7] = transMatrix[15]; uniques[8] = transMatrix[24]; // 3
uniques[9] = transMatrix[26]; uniques[10] = transMatrix[27]; uniques[11] = transMatrix[28]; // 4
uniques[12] = transMatrix[40]; uniques[13] = transMatrix[50]; uniques[14] = transMatrix[60]; // 5
uniques[15] = transMatrix[62]; uniques[16] = transMatrix[146]; uniques[17] = transMatrix[156]; // 6
uniques[18] = transMatrix[158]; uniques[19] = transMatrix[168]; uniques[20] = transMatrix[170]; // 7
uniques[21] = transMatrix[172]; uniques[22] = transMatrix[184]; uniques[23] = transMatrix[290]; // 8
uniques[24] = transMatrix[300]; uniques[25] = transMatrix[302]; uniques[26] = transMatrix[312]; // 9
uniques[27] = transMatrix[314]; uniques[28] = transMatrix[316]; uniques[29] = transMatrix[328]; // 10
uniques[30] = transMatrix[1010]; uniques[31] = transMatrix[1020]; uniques[32] = transMatrix[1022]; // 11
uniques[33] = transMatrix[1032]; uniques[34] = transMatrix[1034]; uniques[35] = transMatrix[1036]; // 12
uniques[36] = transMatrix[1048]; uniques[37] = transMatrix[1154]; uniques[38] = transMatrix[1164]; // 13
uniques[39] = transMatrix[1166]; uniques[40] = transMatrix[1176]; uniques[41] = transMatrix[1178]; // 14
uniques[42] = transMatrix[1180]; uniques[43] = transMatrix[1192]; uniques[44] = transMatrix[2018]; // 15
uniques[45] = transMatrix[2028]; uniques[46] = transMatrix[2030]; uniques[47] = transMatrix[2040]; // 16
uniques[48] = transMatrix[2042]; uniques[49] = transMatrix[2044]; uniques[50] = transMatrix[2056]; // 17

__syncthreads();


volatile __shared__ float *TMS[2592]; 

  for (int t=0; t<2592; t++)    
  {
    for (int m=0; m< 51; m++){
       if (pointerMatrix[t] == m)
       {
        TMS[t] = (&uniques[m]);
       }
  }
__syncthreads();


int b0 = 0;
int c0 = 0;
int d0 = 0;
int e0 = 0;
int f0 = 0;
int g0 = 0;
int h0 = 0;
int i0 = 0;
int j0 = 0;
int k0 = 0;
int l0 = 0;
int m0 = 0;
int n0 = 0;
int o0 = 0;
int p0 = 0;
int q0 = 0;
int r0 = 0;
int s0 = 0;
int t0 = 0;
int u0 = 0;
int v0 = 0;
int w0 = 0;
int x0 = 0;
int y0 = 0;




int index = 0;
float r = 0.0;
float temp = 0;

int RUsnapshot = 0; 
int leftsnap = 0;

curandState s;
curand_init (rands[globalIdx] , 0, 0, &s);



for (int i =0; i < 160000; i++) //@@@depends on iterations @@@@@
{
        ///////////////////////////////////////////////////
            r = curand_uniform(&s);

            RUsnapshot = b0;

            //index =  ((((left[j] * dimen2 + right[j]) * dimen3  + RU[j +1 ]) * dimen4) * dimen5) ;
            index =  ((((0 * 6 + c0) * 6  + b0) * 2) * 6) ;

            b0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
                (! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
                (! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;

            leftsnap = b0;
        ///////////////////////////////////////////////////////////    
        ///////////////////////////////////////////////////
            r = curand_uniform(&s);

            RUsnapshot = c0;

            //index =  ((((left[j] * dimen2 + right[j]) * dimen3  + RU[j +1 ]) * dimen4) * dimen5) ;
            index =  ((((leftsnap * 6 + d0) * 6  + c0) * 2) * 6) ;

            c0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
                (! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
                (! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;

            leftsnap = c0;
        ///////////////////////////////////////////////////////////   
        ///////////////////////////////////////////////////
            r = curand_uniform(&s);

            RUsnapshot = d0;

            //index =  ((((left[j] * dimen2 + right[j]) * dimen3  + RU[j +1 ]) * dimen4) * dimen5) ;
            index =  ((((leftsnap * 6 + e0) * 6  + d0) * 2) * 6) ;

            d0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
                (! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
                (! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;

            leftsnap = d0;
        ///////////////////////////////////////////////////////////   
        ///////////////////////////////////////////////////
            r = curand_uniform(&s);

            RUsnapshot = e0;

            //index =  ((((left[j] * dimen2 + right[j]) * dimen3  + RU[j +1 ]) * dimen4) * dimen5) ;
            index =  ((((leftsnap * 6 + f0) * 6  + e0) * 2) * 6) ;

            e0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
                (! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
                (! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;

            leftsnap = e0;
        ///////////////////////////////////////////////////////////   
        ///////////////////////////////////////////////////
            r = curand_uniform(&s);

            RUsnapshot = f0;

            //index =  ((((left[j] * dimen2 + right[j]) * dimen3  + RU[j +1 ]) * dimen4) * dimen5) ;
            index =  ((((leftsnap * 6 + g0) * 6  + f0) * 2) * 6) ;

            f0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
                (! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
                (! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;

            leftsnap = f0;
        ///////////////////////////////////////////////////////////   
        ///////////////////////////////////////////////////
            r = curand_uniform(&s);

            RUsnapshot = g0;

            //index =  ((((left[j] * dimen2 + right[j]) * dimen3  + RU[j +1 ]) * dimen4) * dimen5) ;
            index =  ((((leftsnap * 6 + h0) * 6  + g0) * 2) * 6) ;

            g0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
                (! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
                (! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;

            leftsnap = g0;
        ///////////////////////////////////////////////////////////   
        ///////////////////////////////////////////////////
            r = curand_uniform(&s);

            RUsnapshot = h0;

            //index =  ((((left[j] * dimen2 + right[j]) * dimen3  + RU[j +1 ]) * dimen4) * dimen5) ;
            index =  ((((leftsnap * 6 + i0) * 6  + h0) * 2) * 6) ;

            h0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
                (! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
                (! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;

            leftsnap = h0;
        ///////////////////////////////////////////////////////////   
        ///////////////////////////////////////////////////
            r = curand_uniform(&s);

            RUsnapshot = i0;

            //index =  ((((left[j] * dimen2 + right[j]) * dimen3  + RU[j +1 ]) * dimen4) * dimen5) ;
            index =  ((((leftsnap * 6 + j0) * 6  + i0) * 2) * 6) ;

            i0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
                (! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
                (! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;

            leftsnap = i0;
        ///////////////////////////////////////////////////////////   
        ///////////////////////////////////////////////////
            r = curand_uniform(&s);

            RUsnapshot = j0;

            //index =  ((((left[j] * dimen2 + right[j]) * dimen3  + RU[j +1 ]) * dimen4) * dimen5) ;
            index =  ((((leftsnap * 6 + k0) * 6  + j0) * 2) * 6) ;

            j0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
                (! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
                (! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;

            leftsnap = j0;
        ///////////////////////////////////////////////////////////    
        ///////////////////////////////////////////////////
            r = curand_uniform(&s);

            RUsnapshot = k0;

            //index =  ((((left[j] * dimen2 + right[j]) * dimen3  + RU[j +1 ]) * dimen4) * dimen5) ;
            index =  ((((leftsnap * 6 + l0) * 6  + k0) * 2) * 6) ;

            k0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
                (! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
                (! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;

            leftsnap = k0;
        ///////////////////////////////////////////////////////////   
        ///////////////////////////////////////////////////
            r = curand_uniform(&s);

            RUsnapshot = l0;

            //index =  ((((left[j] * dimen2 + right[j]) * dimen3  + RU[j +1 ]) * dimen4) * dimen5) ;
            index =  ((((leftsnap * 6 + m0) * 6  + l0) * 2) * 6) ;

            l0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
                (! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
                (! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;

            leftsnap = l0;
        ///////////////////////////////////////////////////////////   
        ///////////////////////////////////////////////////
            r = curand_uniform(&s);

            RUsnapshot = m0;

            //index =  ((((left[j] * dimen2 + right[j]) * dimen3  + RU[j +1 ]) * dimen4) * dimen5) ;
            index =  ((((leftsnap * 6 + n0) * 6  + m0) * 2) * 6) ;

            m0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
                (! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
                (! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;

            leftsnap = m0;
        ///////////////////////////////////////////////////////////   
        ///////////////////////////////////////////////////
            r = curand_uniform(&s);

            RUsnapshot = n0;

            //index =  ((((left[j] * dimen2 + right[j]) * dimen3  + RU[j +1 ]) * dimen4) * dimen5) ;
            index =  ((((leftsnap * 6 + o0) * 6  + n0) * 2) * 6) ;

            n0 += ( r < *TMS[index]) * (*TMS[index + 1]) +
                (! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
                (! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;

            leftsnap = n0;
        ///////////////////////////////////////////////////////////   
        ///////////////////////////////////////////////////
            r = curand_uniform(&s);

            RUsnapshot = o0;

            //index =  ((((left[j] * dimen2 + right[j]) * dimen3  + RU[j +1 ]) * dimen4) * dimen5) ;
            index =  ((((leftsnap * 6 + p0) * 6  + o0) * 2) * 6) ;

            o0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
                (! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
                (! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;

            leftsnap = o0;
        ///////////////////////////////////////////////////////////   
        ///////////////////////////////////////////////////
            r = curand_uniform(&s);

            RUsnapshot = p0;

            //index =  ((((left[j] * dimen2 + right[j]) * dimen3  + RU[j +1 ]) * dimen4) * dimen5) ;
            index =  ((((leftsnap * 6 + q0) * 6  + p0) * 2) * 6) ;

            p0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
                (! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
                (! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;

            leftsnap = p0;
        ///////////////////////////////////////////////////////////   
        ///////////////////////////////////////////////////
            r = curand_uniform(&s);

            RUsnapshot = q0;

            //index =  ((((left[j] * dimen2 + right[j]) * dimen3  + RU[j +1 ]) * dimen4) * dimen5) ;
            index =  ((((leftsnap * 6 + r0) * 6  + q0) * 2) * 6) ;

            q0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
                (! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
                (! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;

            leftsnap = q0;
        ///////////////////////////////////////////////////////////   
        ///////////////////////////////////////////////////
            r = curand_uniform(&s);

            RUsnapshot = r0;

            //index =  ((((left[j] * dimen2 + right[j]) * dimen3  + RU[j +1 ]) * dimen4) * dimen5) ;
            index =  ((((leftsnap * 6 + s0) * 6  + r0) * 2) * 6) ;

            r0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
                (! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
                (! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;

            leftsnap = r0;
        ///////////////////////////////////////////////////////////    
        ///////////////////////////////////////////////////
            r = curand_uniform(&s);

            RUsnapshot = s0;

            //index =  ((((left[j] * dimen2 + right[j]) * dimen3  + RU[j +1 ]) * dimen4) * dimen5) ;
            index =  ((((leftsnap * 6 + t0) * 6  + s0) * 2) * 6) ;

            s0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
                (! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
                (! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;

            leftsnap = s0;
        ///////////////////////////////////////////////////////////   
        ///////////////////////////////////////////////////
            r = curand_uniform(&s);

            RUsnapshot = t0;

            //index =  ((((left[j] * dimen2 + right[j]) * dimen3  + RU[j +1 ]) * dimen4) * dimen5) ;
            index =  ((((leftsnap * 6 + u0) * 6  + t0) * 2) * 6) ;

            t0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
                (! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
                (! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;

            leftsnap = t0;
        ///////////////////////////////////////////////////////////   
        ///////////////////////////////////////////////////
            r = curand_uniform(&s);

            RUsnapshot =u0;

            //index =  ((((left[j] * dimen2 + right[j]) * dimen3  + RU[j +1 ]) * dimen4) * dimen5) ;
            index =  ((((leftsnap * 6 + v0) * 6  + u0) * 2) * 6) ;

            u0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
                (! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
                (! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;

            leftsnap =u0;
        ///////////////////////////////////////////////////////////   
        ///////////////////////////////////////////////////
            r = curand_uniform(&s);

            RUsnapshot = v0;

            //index =  ((((left[j] * dimen2 + right[j]) * dimen3  + RU[j +1 ]) * dimen4) * dimen5) ;
            index =  ((((leftsnap * 6 + w0) * 6  + v0) * 2) * 6) ;

            v0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
                (! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
                (! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;

            leftsnap = v0;
        ///////////////////////////////////////////////////////////   
        ///////////////////////////////////////////////////
            r = curand_uniform(&s);

            RUsnapshot = w0;

            //index =  ((((left[j] * dimen2 + right[j]) * dimen3  + RU[j +1 ]) * dimen4) * dimen5) ;
            index =  ((((leftsnap * 6 + x0) * 6  + w0) * 2) * 6) ;

            w0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
                (! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
                (! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;

            leftsnap = w0;
        ///////////////////////////////////////////////////////////   
        ///////////////////////////////////////////////////
            r = curand_uniform(&s);

            RUsnapshot = x0;

            //index =  ((((left[j] * dimen2 + right[j]) * dimen3  + RU[j +1 ]) * dimen4) * dimen5) ;
            index =  ((((leftsnap * 6 + y0) * 6  + x0) * 2) * 6) ;

            x0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
                (! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
                (! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;

            leftsnap = x0;
        ///////////////////////////////////////////////////////////   
        ///////////////////////////////////////////////////
            r = curand_uniform(&s);

            RUsnapshot = y0;

            //index =  ((((left[j] * dimen2 + right[j]) * dimen3  + RU[j +1 ]) * dimen4) * dimen5) ;
            index =  ((((leftsnap * 6 + 0) * 6  + y0) * 2) * 6) ;

            y0+= ( r < *TMS[index]) * (*TMS[index + 1]) +
                (! (r < *TMS[index])) * ( r < *TMS[index + 2]) * (*TMS[index + 3]) +
                (! ( r < *TMS[index + 2])) * (r < *TMS[index + 4]) * (*TMS[index + 5]) ;


        ///////////////////////////////////////////////////////////   






            temp = (b0 ==4) + (b0 ==5) + (c0 ==4) + (c0 ==5) + (d0 ==4) + (d0 ==5) + (e0 ==4) + (e0 ==5) + (f0 ==4) + (f0 ==5) + 
                   (g0 ==4) + (g0 ==5) + (h0 ==4) + (h0 ==5) + (i0 ==4) + (i0 ==5) + (j0 ==4) + (j0 ==5) + (k0 ==4) + (k0 ==5) + 
                   (l0 ==4) + (l0 ==5) + (m0 ==4) + (m0 ==5) + (n0 ==4) + (n0 ==5) + (o0 ==4) + (o0 ==5) + (p0 ==4) + (p0 ==5) + 
                   (q0 ==4) + (q0 ==5) + (r0 ==4) + (r0 ==5) + (s0 ==4) + (s0 ==5) + (t0 ==4) + (t0 ==5) + (u0 ==4) + (u0 ==5) + 
                   (v0 ==4) + (v0 ==5) + (w0 ==4) + (w0 ==5) + (x0 ==4) + (x0 ==5) + (y0 ==4) + (y0 ==5);


        masterForces[globalIdx + (r_max * i)] = *TMS[1]; 
        temp = 0.0;

}



}

}

Solution

  • Your example code should not give any errors as was proved by Robert Crovella. I looked at your original code and found a misaligned parenthesis cause by that first "for" loop. Good luck...