Search code examples
cudagpu-shared-memory

How to differentiate between pointers to shared and global memory?


In CUDA, given the value of a pointer, or the address of a variable, is there an intrinsic or another API which will introspect which address space the pointer refers to?


Solution

  • The CUDA header file sm_20_intrinsics.h defines the function

    __device__ unsigned int __isGlobal(const void *ptr)
    {
      unsigned int ret;
      asm volatile ("{ \n\t"
                    "    .reg .pred p; \n\t"
                    "    isspacep.global p, %1; \n\t"
                    "    selp.u32 %0, 1, 0, p;  \n\t"
    #if (defined(_MSC_VER) && defined(_WIN64)) || defined(__LP64__)
                    "} \n\t" : "=r"(ret) : "l"(ptr));
    #else
                    "} \n\t" : "=r"(ret) : "r"(ptr));
    #endif
    
      return ret;
    }
    

    This function returns 1 if generic address ptr is in global memory space. It returns 0 if ptr is in shared, local or constant memory space.

    The PTX instruction isspacep does the heavy lifting. It seems like we should be able to build the analogous function this way:

    __device__ unsigned int __isShared(const void *ptr)
    {
      unsigned int ret;
      asm volatile ("{ \n\t"
                    "    .reg .pred p; \n\t"
                    "    isspacep.shared p, %1; \n\t"
                    "    selp.u32 %0, 1, 0, p;  \n\t"
    #if (defined(_MSC_VER) && defined(_WIN64)) || defined(__LP64__)
                    "} \n\t" : "=r"(ret) : "l"(ptr));
    #else
                    "} \n\t" : "=r"(ret) : "r"(ptr));
    #endif
    
      return ret;
    }
    

    Update:

    __isGlobal() and other address space predicate functions are described in the CUDA C++ Programming Guide.