hostdepend/X86MAC64/util/X86MAC64/cuda/samples/2_Graphics/volumeRender/volumeRender_kernel.cu

   1 /*
   2  * Copyright 1993-2013 NVIDIA Corporation.  All rights reserved.
   3  *
   4  * Please refer to the NVIDIA end user license agreement (EULA) associated
   5  * with this source code for terms and conditions that govern your use of
   6  * this software. Any use, reproduction, disclosure, or distribution of
   7  * this software and related documentation outside the terms of the EULA
   8  * is strictly prohibited.
   9  *
  10  */
  11
  12 // Simple 3D volume renderer
  13
  14 #ifndef _VOLUMERENDER_KERNEL_CU_
  15 #define _VOLUMERENDER_KERNEL_CU_
  16
  17 #include <helper_cuda.h>
  18 #include <helper_math.h>
  19
  20 typedef unsigned int  uint;
  21 typedef unsigned char uchar;
  22
  23 cudaArray *d_volumeArray = 0;
  24 cudaArray *d_transferFuncArray;
  25
  26 typedef unsigned char VolumeType;
  27 //typedef unsigned short VolumeType;
  28
  29 texture<VolumeType, 3, cudaReadModeNormalizedFloat> tex;         // 3D texture
  30 texture<float4, 1, cudaReadModeElementType>         transferTex; // 1D transfer function texture
  31
  32 typedef struct
  33 {
  34     float4 m[3];
  35 } float3x4;
  36
  37 __constant__ float3x4 c_invViewMatrix;  // inverse view matrix
  38
  39 struct Ray
  40 {
  41     float3 o;   // origin
  42     float3 d;   // direction
  43 };
  44
  45 // intersect ray with a box
  46 // http://www.siggraph.org/education/materials/HyperGraph/raytrace/rtinter3.htm
  47
  48 __device__
  49 int intersectBox(Ray r, float3 boxmin, float3 boxmax, float *tnear, float *tfar)
  50 {
  51     // compute intersection of ray with all six bbox planes
  52     float3 invR = make_float3(1.0f) / r.d;
  53     float3 tbot = invR * (boxmin - r.o);
  54     float3 ttop = invR * (boxmax - r.o);
  55
  56     // re-order intersections to find smallest and largest on each axis
  57     float3 tmin = fminf(ttop, tbot);
  58     float3 tmax = fmaxf(ttop, tbot);
  59
  60     // find the largest tmin and the smallest tmax
  61     float largest_tmin = fmaxf(fmaxf(tmin.x, tmin.y), fmaxf(tmin.x, tmin.z));
  62     float smallest_tmax = fminf(fminf(tmax.x, tmax.y), fminf(tmax.x, tmax.z));
  63
  64     *tnear = largest_tmin;
  65     *tfar = smallest_tmax;
  66
  67     return smallest_tmax > largest_tmin;
  68 }
  69
  70 // transform vector by matrix (no translation)
  71 __device__
  72 float3 mul(const float3x4 &M, const float3 &v)
  73 {
  74     float3 r;
  75     r.x = dot(v, make_float3(M.m[0]));
  76     r.y = dot(v, make_float3(M.m[1]));
  77     r.z = dot(v, make_float3(M.m[2]));
  78     return r;
  79 }
  80
  81 // transform vector by matrix with translation
  82 __device__
  83 float4 mul(const float3x4 &M, const float4 &v)
  84 {
  85     float4 r;
  86     r.x = dot(v, M.m[0]);
  87     r.y = dot(v, M.m[1]);
  88     r.z = dot(v, M.m[2]);
  89     r.w = 1.0f;
  90     return r;
  91 }
  92
  93 __device__ uint rgbaFloatToInt(float4 rgba)
  94 {
  95     rgba.x = __saturatef(rgba.x);   // clamp to [0.0, 1.0]
  96     rgba.y = __saturatef(rgba.y);
  97     rgba.z = __saturatef(rgba.z);
  98     rgba.w = __saturatef(rgba.w);
  99     return (uint(rgba.w*255)<<24) | (uint(rgba.z*255)<<16) | (uint(rgba.y*255)<<8) | uint(rgba.x*255);
 100 }
 101
 102 __global__ void
 103 d_render(uint *d_output, uint imageW, uint imageH,
 104          float density, float brightness,
 105          float transferOffset, float transferScale)
 106 {
 107     const int maxSteps = 500;
 108     const float tstep = 0.01f;
 109     const float opacityThreshold = 0.95f;
 110     const float3 boxMin = make_float3(-1.0f, -1.0f, -1.0f);
 111     const float3 boxMax = make_float3(1.0f, 1.0f, 1.0f);
 112
 113     uint x = blockIdx.x*blockDim.x + threadIdx.x;
 114     uint y = blockIdx.y*blockDim.y + threadIdx.y;
 115
 116     if ((x >= imageW) || (y >= imageH)) return;
 117
 118     float u = (x / (float) imageW)*2.0f-1.0f;
 119     float v = (y / (float) imageH)*2.0f-1.0f;
 120
 121     // calculate eye ray in world space
 122     Ray eyeRay;
 123     eyeRay.o = make_float3(mul(c_invViewMatrix, make_float4(0.0f, 0.0f, 0.0f, 1.0f)));
 124     eyeRay.d = normalize(make_float3(u, v, -2.0f));
 125     eyeRay.d = mul(c_invViewMatrix, eyeRay.d);
 126
 127     // find intersection with box
 128     float tnear, tfar;
 129     int hit = intersectBox(eyeRay, boxMin, boxMax, &tnear, &tfar);
 130
 131     if (!hit) return;
 132
 133     if (tnear < 0.0f) tnear = 0.0f;     // clamp to near plane
 134
 135     // march along ray from front to back, accumulating color
 136     float4 sum = make_float4(0.0f);
 137     float t = tnear;
 138     float3 pos = eyeRay.o + eyeRay.d*tnear;
 139     float3 step = eyeRay.d*tstep;
 140
 141     for (int i=0; i<maxSteps; i++)
 142     {
 143         // read from 3D texture
 144         // remap position to [0, 1] coordinates
 145         float sample = tex3D(tex, pos.x*0.5f+0.5f, pos.y*0.5f+0.5f, pos.z*0.5f+0.5f);
 146         //sample *= 64.0f;    // scale for 10-bit data
 147
 148         // lookup in transfer function texture
 149         float4 col = tex1D(transferTex, (sample-transferOffset)*transferScale);
 150         col.w *= density;
 151
 152         // "under" operator for back-to-front blending
 153         //sum = lerp(sum, col, col.w);
 154
 155         // pre-multiply alpha
 156         col.x *= col.w;
 157         col.y *= col.w;
 158         col.z *= col.w;
 159         // "over" operator for front-to-back blending
 160         sum = sum + col*(1.0f - sum.w);
 161
 162         // exit early if opaque
 163         if (sum.w > opacityThreshold)
 164             break;
 165
 166         t += tstep;
 167
 168         if (t > tfar) break;
 169
 170         pos += step;
 171     }
 172
 173     sum *= brightness;
 174
 175     // write output color
 176     d_output[y*imageW + x] = rgbaFloatToInt(sum);
 177 }
 178
 179 extern "C"
 180 void setTextureFilterMode(bool bLinearFilter)
 181 {
 182     tex.filterMode = bLinearFilter ? cudaFilterModeLinear : cudaFilterModePoint;
 183 }
 184
 185 extern "C"
 186 void initCuda(void *h_volume, cudaExtent volumeSize)
 187 {
 188     // create 3D array
 189     cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<VolumeType>();
 190     checkCudaErrors(cudaMalloc3DArray(&d_volumeArray, &channelDesc, volumeSize));
 191
 192     // copy data to 3D array
 193     cudaMemcpy3DParms copyParams = {0};
 194     copyParams.srcPtr   = make_cudaPitchedPtr(h_volume, volumeSize.width*sizeof(VolumeType), volumeSize.width, volumeSize.height);
 195     copyParams.dstArray = d_volumeArray;
 196     copyParams.extent   = volumeSize;
 197     copyParams.kind     = cudaMemcpyHostToDevice;
 198     checkCudaErrors(cudaMemcpy3D(&copyParams));
 199
 200     // set texture parameters
 201     tex.normalized = true;                      // access with normalized texture coordinates
 202     tex.filterMode = cudaFilterModeLinear;      // linear interpolation
 203     tex.addressMode[0] = cudaAddressModeClamp;  // clamp texture coordinates
 204     tex.addressMode[1] = cudaAddressModeClamp;
 205
 206     // bind array to 3D texture
 207     checkCudaErrors(cudaBindTextureToArray(tex, d_volumeArray, channelDesc));
 208
 209     // create transfer function texture
 210     float4 transferFunc[] =
 211     {
 212         {  0.0, 0.0, 0.0, 0.0, },
 213         {  1.0, 0.0, 0.0, 1.0, },
 214         {  1.0, 0.5, 0.0, 1.0, },
 215         {  1.0, 1.0, 0.0, 1.0, },
 216         {  0.0, 1.0, 0.0, 1.0, },
 217         {  0.0, 1.0, 1.0, 1.0, },
 218         {  0.0, 0.0, 1.0, 1.0, },
 219         {  1.0, 0.0, 1.0, 1.0, },
 220         {  0.0, 0.0, 0.0, 0.0, },
 221     };
 222
 223     cudaChannelFormatDesc channelDesc2 = cudaCreateChannelDesc<float4>();
 224     cudaArray *d_transferFuncArray;
 225     checkCudaErrors(cudaMallocArray(&d_transferFuncArray, &channelDesc2, sizeof(transferFunc)/sizeof(float4), 1));
 226     checkCudaErrors(cudaMemcpyToArray(d_transferFuncArray, 0, 0, transferFunc, sizeof(transferFunc), cudaMemcpyHostToDevice));
 227
 228     transferTex.filterMode = cudaFilterModeLinear;
 229     transferTex.normalized = true;    // access with normalized texture coordinates
 230     transferTex.addressMode[0] = cudaAddressModeClamp;   // wrap texture coordinates
 231
 232     // Bind the array to the texture
 233     checkCudaErrors(cudaBindTextureToArray(transferTex, d_transferFuncArray, channelDesc2));
 234 }
 235
 236 extern "C"
 237 void freeCudaBuffers()
 238 {
 239     checkCudaErrors(cudaFreeArray(d_volumeArray));
 240     checkCudaErrors(cudaFreeArray(d_transferFuncArray));
 241 }
 242
 243
 244 extern "C"
 245 void render_kernel(dim3 gridSize, dim3 blockSize, uint *d_output, uint imageW, uint imageH,
 246                    float density, float brightness, float transferOffset, float transferScale)
 247 {
 248     d_render<<<gridSize, blockSize>>>(d_output, imageW, imageH, density,
 249                                       brightness, transferOffset, transferScale);
 250 }
 251
 252 extern "C"
 253 void copyInvViewMatrix(float *invViewMatrix, size_t sizeofMatrix)
 254 {
 255     checkCudaErrors(cudaMemcpyToSymbol(c_invViewMatrix, invViewMatrix, sizeofMatrix));
 256 }
 257
 258
 259 #endif // #ifndef _VOLUMERENDER_KERNEL_CU_