utiltools/X86MAC64/cuda/samples/7_CUDALibraries/MC_EstimatePiInlineP/inc/cudasharedmem.h

   1 /*
   2  * Copyright 1993-2013 NVIDIA Corporation.  All rights reserved.
   3  *
   4  * Please refer to the NVIDIA end user license agreement (EULA) associated
   5  * with this source code for terms and conditions that govern your use of
   6  * this software. Any use, reproduction, disclosure, or distribution of
   7  * this software and related documentation outside the terms of the EULA
   8  * is strictly prohibited.
   9  *
  10  */
  11
  12 #ifndef CUDASHAREDMEM_H
  13 #define CUDASHAREDMEM_H
  14
  15 //****************************************************************************
  16 // Because dynamically sized shared memory arrays are declared "extern",
  17 // we can't templatize them directly.  To get around this, we declare a
  18 // simple wrapper struct that will declare the extern array with a different
  19 // name depending on the type.  This avoids compiler errors about duplicate
  20 // definitions.
  21 //
  22 // To use dynamically allocated shared memory in a templatized __global__ or
  23 // __device__ function, just replace code like this:
  24 //      template<class T>
  25 //      __global__ void
  26 //      foo( T* g_idata, T* g_odata)
  27 //      {
  28 //          // Shared mem size is determined by the host app at run time
  29 //          extern __shared__  T sdata[];
  30 //          ...
  31 //          x = sdata[i];
  32 //          sdata[i] = x;
  33 //          ...
  34 //      }
  35 //
  36 // With this:
  37 //      template<class T>
  38 //      __global__ void
  39 //      foo( T* g_idata, T* g_odata)
  40 //      {
  41 //          // Shared mem size is determined by the host app at run time
  42 //          SharedMemory<T> sdata;
  43 //          ...
  44 //          x = sdata[i];
  45 //          sdata[i] = x;
  46 //          ...
  47 //      }
  48 //****************************************************************************
  49
  50 // This is the un-specialized struct.  Note that we prevent instantiation of this
  51 // struct by making it abstract (i.e. with pure virtual methods).
  52 template <typename T>
  53 struct SharedMemory
  54 {
  55     // Ensure that we won't compile any un-specialized types
  56     virtual __device__ T &operator*() = 0;
  57     virtual __device__ T &operator[](int i) = 0;
  58 };
  59
  60 #define BUILD_SHAREDMEMORY_TYPE(t, n) \
  61     template <> \
  62     struct SharedMemory<t> \
  63     { \
  64         __device__ t &operator*() { extern __shared__ t n[]; return *n; } \
  65         __device__ t &operator[](int i) { extern __shared__ t n[]; return n[i]; } \
  66     }
  67
  68 BUILD_SHAREDMEMORY_TYPE(int,            s_int);
  69 BUILD_SHAREDMEMORY_TYPE(unsigned int,   s_uint);
  70 BUILD_SHAREDMEMORY_TYPE(char,           s_char);
  71 BUILD_SHAREDMEMORY_TYPE(unsigned char,  s_uchar);
  72 BUILD_SHAREDMEMORY_TYPE(short,          s_short);
  73 BUILD_SHAREDMEMORY_TYPE(unsigned short, s_ushort);
  74 BUILD_SHAREDMEMORY_TYPE(long,           s_long);
  75 BUILD_SHAREDMEMORY_TYPE(unsigned long,  s_ulong);
  76 BUILD_SHAREDMEMORY_TYPE(bool,           s_bool);
  77 BUILD_SHAREDMEMORY_TYPE(float,          s_float);
  78 BUILD_SHAREDMEMORY_TYPE(double,         s_double);
  79
  80 #endif