2 * Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
4 * Please refer to the NVIDIA end user license agreement (EULA) associated
5 * with this source code for terms and conditions that govern your use of
6 * this software. Any use, reproduction, disclosure, or distribution of
7 * this software and related documentation outside the terms of the EULA
8 * is strictly prohibited.
12 // Simple 3D volume renderer
14 #ifndef _VOLUMERENDER_KERNEL_CU_
15 #define _VOLUMERENDER_KERNEL_CU_
17 #include <helper_cuda.h>
18 #include <helper_math.h>
20 typedef unsigned int uint;
21 typedef unsigned char uchar;
23 cudaArray *d_volumeArray = 0;
24 cudaArray *d_transferFuncArray;
26 typedef unsigned char VolumeType;
27 //typedef unsigned short VolumeType;
29 texture<VolumeType, 3, cudaReadModeNormalizedFloat> tex; // 3D texture
30 texture<float4, 1, cudaReadModeElementType> transferTex; // 1D transfer function texture
37 __constant__ float3x4 c_invViewMatrix; // inverse view matrix
42 float3 d; // direction
45 // intersect ray with a box
46 // http://www.siggraph.org/education/materials/HyperGraph/raytrace/rtinter3.htm
49 int intersectBox(Ray r, float3 boxmin, float3 boxmax, float *tnear, float *tfar)
51 // compute intersection of ray with all six bbox planes
52 float3 invR = make_float3(1.0f) / r.d;
53 float3 tbot = invR * (boxmin - r.o);
54 float3 ttop = invR * (boxmax - r.o);
56 // re-order intersections to find smallest and largest on each axis
57 float3 tmin = fminf(ttop, tbot);
58 float3 tmax = fmaxf(ttop, tbot);
60 // find the largest tmin and the smallest tmax
61 float largest_tmin = fmaxf(fmaxf(tmin.x, tmin.y), fmaxf(tmin.x, tmin.z));
62 float smallest_tmax = fminf(fminf(tmax.x, tmax.y), fminf(tmax.x, tmax.z));
64 *tnear = largest_tmin;
65 *tfar = smallest_tmax;
67 return smallest_tmax > largest_tmin;
70 // transform vector by matrix (no translation)
72 float3 mul(const float3x4 &M, const float3 &v)
75 r.x = dot(v, make_float3(M.m[0]));
76 r.y = dot(v, make_float3(M.m[1]));
77 r.z = dot(v, make_float3(M.m[2]));
81 // transform vector by matrix with translation
83 float4 mul(const float3x4 &M, const float4 &v)
93 __device__ uint rgbaFloatToInt(float4 rgba)
95 rgba.x = __saturatef(rgba.x); // clamp to [0.0, 1.0]
96 rgba.y = __saturatef(rgba.y);
97 rgba.z = __saturatef(rgba.z);
98 rgba.w = __saturatef(rgba.w);
99 return (uint(rgba.w*255)<<24) | (uint(rgba.z*255)<<16) | (uint(rgba.y*255)<<8) | uint(rgba.x*255);
103 d_render(uint *d_output, uint imageW, uint imageH,
104 float density, float brightness,
105 float transferOffset, float transferScale)
107 const int maxSteps = 500;
108 const float tstep = 0.01f;
109 const float opacityThreshold = 0.95f;
110 const float3 boxMin = make_float3(-1.0f, -1.0f, -1.0f);
111 const float3 boxMax = make_float3(1.0f, 1.0f, 1.0f);
113 uint x = blockIdx.x*blockDim.x + threadIdx.x;
114 uint y = blockIdx.y*blockDim.y + threadIdx.y;
116 if ((x >= imageW) || (y >= imageH)) return;
118 float u = (x / (float) imageW)*2.0f-1.0f;
119 float v = (y / (float) imageH)*2.0f-1.0f;
121 // calculate eye ray in world space
123 eyeRay.o = make_float3(mul(c_invViewMatrix, make_float4(0.0f, 0.0f, 0.0f, 1.0f)));
124 eyeRay.d = normalize(make_float3(u, v, -2.0f));
125 eyeRay.d = mul(c_invViewMatrix, eyeRay.d);
127 // find intersection with box
129 int hit = intersectBox(eyeRay, boxMin, boxMax, &tnear, &tfar);
133 if (tnear < 0.0f) tnear = 0.0f; // clamp to near plane
135 // march along ray from front to back, accumulating color
136 float4 sum = make_float4(0.0f);
138 float3 pos = eyeRay.o + eyeRay.d*tnear;
139 float3 step = eyeRay.d*tstep;
141 for (int i=0; i<maxSteps; i++)
143 // read from 3D texture
144 // remap position to [0, 1] coordinates
145 float sample = tex3D(tex, pos.x*0.5f+0.5f, pos.y*0.5f+0.5f, pos.z*0.5f+0.5f);
146 //sample *= 64.0f; // scale for 10-bit data
148 // lookup in transfer function texture
149 float4 col = tex1D(transferTex, (sample-transferOffset)*transferScale);
152 // "under" operator for back-to-front blending
153 //sum = lerp(sum, col, col.w);
155 // pre-multiply alpha
159 // "over" operator for front-to-back blending
160 sum = sum + col*(1.0f - sum.w);
162 // exit early if opaque
163 if (sum.w > opacityThreshold)
175 // write output color
176 d_output[y*imageW + x] = rgbaFloatToInt(sum);
180 void setTextureFilterMode(bool bLinearFilter)
182 tex.filterMode = bLinearFilter ? cudaFilterModeLinear : cudaFilterModePoint;
186 void initCuda(void *h_volume, cudaExtent volumeSize)
189 cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<VolumeType>();
190 checkCudaErrors(cudaMalloc3DArray(&d_volumeArray, &channelDesc, volumeSize));
192 // copy data to 3D array
193 cudaMemcpy3DParms copyParams = {0};
194 copyParams.srcPtr = make_cudaPitchedPtr(h_volume, volumeSize.width*sizeof(VolumeType), volumeSize.width, volumeSize.height);
195 copyParams.dstArray = d_volumeArray;
196 copyParams.extent = volumeSize;
197 copyParams.kind = cudaMemcpyHostToDevice;
198 checkCudaErrors(cudaMemcpy3D(©Params));
200 // set texture parameters
201 tex.normalized = true; // access with normalized texture coordinates
202 tex.filterMode = cudaFilterModeLinear; // linear interpolation
203 tex.addressMode[0] = cudaAddressModeClamp; // clamp texture coordinates
204 tex.addressMode[1] = cudaAddressModeClamp;
206 // bind array to 3D texture
207 checkCudaErrors(cudaBindTextureToArray(tex, d_volumeArray, channelDesc));
209 // create transfer function texture
210 float4 transferFunc[] =
212 { 0.0, 0.0, 0.0, 0.0, },
213 { 1.0, 0.0, 0.0, 1.0, },
214 { 1.0, 0.5, 0.0, 1.0, },
215 { 1.0, 1.0, 0.0, 1.0, },
216 { 0.0, 1.0, 0.0, 1.0, },
217 { 0.0, 1.0, 1.0, 1.0, },
218 { 0.0, 0.0, 1.0, 1.0, },
219 { 1.0, 0.0, 1.0, 1.0, },
220 { 0.0, 0.0, 0.0, 0.0, },
223 cudaChannelFormatDesc channelDesc2 = cudaCreateChannelDesc<float4>();
224 cudaArray *d_transferFuncArray;
225 checkCudaErrors(cudaMallocArray(&d_transferFuncArray, &channelDesc2, sizeof(transferFunc)/sizeof(float4), 1));
226 checkCudaErrors(cudaMemcpyToArray(d_transferFuncArray, 0, 0, transferFunc, sizeof(transferFunc), cudaMemcpyHostToDevice));
228 transferTex.filterMode = cudaFilterModeLinear;
229 transferTex.normalized = true; // access with normalized texture coordinates
230 transferTex.addressMode[0] = cudaAddressModeClamp; // wrap texture coordinates
232 // Bind the array to the texture
233 checkCudaErrors(cudaBindTextureToArray(transferTex, d_transferFuncArray, channelDesc2));
237 void freeCudaBuffers()
239 checkCudaErrors(cudaFreeArray(d_volumeArray));
240 checkCudaErrors(cudaFreeArray(d_transferFuncArray));
245 void render_kernel(dim3 gridSize, dim3 blockSize, uint *d_output, uint imageW, uint imageH,
246 float density, float brightness, float transferOffset, float transferScale)
248 d_render<<<gridSize, blockSize>>>(d_output, imageW, imageH, density,
249 brightness, transferOffset, transferScale);
253 void copyInvViewMatrix(float *invViewMatrix, size_t sizeofMatrix)
255 checkCudaErrors(cudaMemcpyToSymbol(c_invViewMatrix, invViewMatrix, sizeofMatrix));
259 #endif // #ifndef _VOLUMERENDER_KERNEL_CU_