2 * Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
4 * Please refer to the NVIDIA end user license agreement (EULA) associated
5 * with this source code for terms and conditions that govern your use of
6 * this software. Any use, reproduction, disclosure, or distribution of
7 * this software and related documentation outside the terms of the EULA
8 * is strictly prohibited.
12 #ifndef _MARCHING_CUBES_KERNEL_CU_
13 #define _MARCHING_CUBES_KERNEL_CU_
17 #include <helper_cuda.h> // includes for helper CUDA functions
18 #include <helper_math.h>
19 #include <cuda_runtime_api.h>
20 #include <thrust/device_vector.h>
21 #include <thrust/scan.h>
26 // textures containing look-up tables
27 texture<uint, 1, cudaReadModeElementType> edgeTex;
28 texture<uint, 1, cudaReadModeElementType> triTex;
29 texture<uint, 1, cudaReadModeElementType> numVertsTex;
32 texture<uchar, 1, cudaReadModeNormalizedFloat> volumeTex;
35 void allocateTextures(uint **d_edgeTable, uint **d_triTable, uint **d_numVertsTable)
37 checkCudaErrors(cudaMalloc((void **) d_edgeTable, 256*sizeof(uint)));
38 checkCudaErrors(cudaMemcpy((void *)*d_edgeTable, (void *)edgeTable, 256*sizeof(uint), cudaMemcpyHostToDevice));
39 cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindUnsigned);
40 checkCudaErrors(cudaBindTexture(0, edgeTex, *d_edgeTable, channelDesc));
42 checkCudaErrors(cudaMalloc((void **) d_triTable, 256*16*sizeof(uint)));
43 checkCudaErrors(cudaMemcpy((void *)*d_triTable, (void *)triTable, 256*16*sizeof(uint), cudaMemcpyHostToDevice));
44 checkCudaErrors(cudaBindTexture(0, triTex, *d_triTable, channelDesc));
46 checkCudaErrors(cudaMalloc((void **) d_numVertsTable, 256*sizeof(uint)));
47 checkCudaErrors(cudaMemcpy((void *)*d_numVertsTable, (void *)numVertsTable, 256*sizeof(uint), cudaMemcpyHostToDevice));
48 checkCudaErrors(cudaBindTexture(0, numVertsTex, *d_numVertsTable, channelDesc));
52 void bindVolumeTexture(uchar *d_volume)
54 // bind to linear texture
55 checkCudaErrors(cudaBindTexture(0, volumeTex, d_volume, cudaCreateChannelDesc(8, 0, 0, 0, cudaChannelFormatKindUnsigned)));
58 // an interesting field function
60 float tangle(float x, float y, float z)
65 return (x*x*x*x - 5.0f*x*x +y*y*y*y - 5.0f*y*y +z*z*z*z - 5.0f*z*z + 11.8f) * 0.2f + 0.5f;
68 // evaluate field function at point
70 float fieldFunc(float3 p)
72 return tangle(p.x, p.y, p.z);
75 // evaluate field function at a point
76 // returns value and gradient in float4
78 float4 fieldFunc4(float3 p)
80 float v = tangle(p.x, p.y, p.z);
81 const float d = 0.001f;
82 float dx = tangle(p.x + d, p.y, p.z) - v;
83 float dy = tangle(p.x, p.y + d, p.z) - v;
84 float dz = tangle(p.x, p.y, p.z + d) - v;
85 return make_float4(dx, dy, dz, v);
88 // sample volume data set at a point
90 float sampleVolume(uchar *data, uint3 p, uint3 gridSize)
92 p.x = min(p.x, gridSize.x - 1);
93 p.y = min(p.y, gridSize.y - 1);
94 p.z = min(p.z, gridSize.z - 1);
95 uint i = (p.z*gridSize.x*gridSize.y) + (p.y*gridSize.x) + p.x;
96 // return (float) data[i] / 255.0f;
97 return tex1Dfetch(volumeTex, i);
100 // compute position in 3d grid from 1d index
101 // only works for power of 2 sizes
103 uint3 calcGridPos(uint i, uint3 gridSizeShift, uint3 gridSizeMask)
106 gridPos.x = i & gridSizeMask.x;
107 gridPos.y = (i >> gridSizeShift.y) & gridSizeMask.y;
108 gridPos.z = (i >> gridSizeShift.z) & gridSizeMask.z;
112 // classify voxel based on number of vertices it will generate
113 // one thread per voxel
115 classifyVoxel(uint *voxelVerts, uint *voxelOccupied, uchar *volume,
116 uint3 gridSize, uint3 gridSizeShift, uint3 gridSizeMask, uint numVoxels,
117 float3 voxelSize, float isoValue)
119 uint blockId = __mul24(blockIdx.y, gridDim.x) + blockIdx.x;
120 uint i = __mul24(blockId, blockDim.x) + threadIdx.x;
122 uint3 gridPos = calcGridPos(i, gridSizeShift, gridSizeMask);
124 // read field values at neighbouring grid vertices
127 field[0] = sampleVolume(volume, gridPos, gridSize);
128 field[1] = sampleVolume(volume, gridPos + make_uint3(1, 0, 0), gridSize);
129 field[2] = sampleVolume(volume, gridPos + make_uint3(1, 1, 0), gridSize);
130 field[3] = sampleVolume(volume, gridPos + make_uint3(0, 1, 0), gridSize);
131 field[4] = sampleVolume(volume, gridPos + make_uint3(0, 0, 1), gridSize);
132 field[5] = sampleVolume(volume, gridPos + make_uint3(1, 0, 1), gridSize);
133 field[6] = sampleVolume(volume, gridPos + make_uint3(1, 1, 1), gridSize);
134 field[7] = sampleVolume(volume, gridPos + make_uint3(0, 1, 1), gridSize);
137 p.x = -1.0f + (gridPos.x * voxelSize.x);
138 p.y = -1.0f + (gridPos.y * voxelSize.y);
139 p.z = -1.0f + (gridPos.z * voxelSize.z);
142 field[0] = fieldFunc(p);
143 field[1] = fieldFunc(p + make_float3(voxelSize.x, 0, 0));
144 field[2] = fieldFunc(p + make_float3(voxelSize.x, voxelSize.y, 0));
145 field[3] = fieldFunc(p + make_float3(0, voxelSize.y, 0));
146 field[4] = fieldFunc(p + make_float3(0, 0, voxelSize.z));
147 field[5] = fieldFunc(p + make_float3(voxelSize.x, 0, voxelSize.z));
148 field[6] = fieldFunc(p + make_float3(voxelSize.x, voxelSize.y, voxelSize.z));
149 field[7] = fieldFunc(p + make_float3(0, voxelSize.y, voxelSize.z));
152 // calculate flag indicating if each vertex is inside or outside isosurface
154 cubeindex = uint(field[0] < isoValue);
155 cubeindex += uint(field[1] < isoValue)*2;
156 cubeindex += uint(field[2] < isoValue)*4;
157 cubeindex += uint(field[3] < isoValue)*8;
158 cubeindex += uint(field[4] < isoValue)*16;
159 cubeindex += uint(field[5] < isoValue)*32;
160 cubeindex += uint(field[6] < isoValue)*64;
161 cubeindex += uint(field[7] < isoValue)*128;
163 // read number of vertices from texture
164 uint numVerts = tex1Dfetch(numVertsTex, cubeindex);
168 voxelVerts[i] = numVerts;
169 voxelOccupied[i] = (numVerts > 0);
174 launch_classifyVoxel(dim3 grid, dim3 threads, uint *voxelVerts, uint *voxelOccupied, uchar *volume,
175 uint3 gridSize, uint3 gridSizeShift, uint3 gridSizeMask, uint numVoxels,
176 float3 voxelSize, float isoValue)
178 // calculate number of vertices need per voxel
179 classifyVoxel<<<grid, threads>>>(voxelVerts, voxelOccupied, volume,
180 gridSize, gridSizeShift, gridSizeMask,
181 numVoxels, voxelSize, isoValue);
182 getLastCudaError("classifyVoxel failed");
186 // compact voxel array
188 compactVoxels(uint *compactedVoxelArray, uint *voxelOccupied, uint *voxelOccupiedScan, uint numVoxels)
190 uint blockId = __mul24(blockIdx.y, gridDim.x) + blockIdx.x;
191 uint i = __mul24(blockId, blockDim.x) + threadIdx.x;
193 if (voxelOccupied[i] && (i < numVoxels))
195 compactedVoxelArray[ voxelOccupiedScan[i] ] = i;
200 launch_compactVoxels(dim3 grid, dim3 threads, uint *compactedVoxelArray, uint *voxelOccupied, uint *voxelOccupiedScan, uint numVoxels)
202 compactVoxels<<<grid, threads>>>(compactedVoxelArray, voxelOccupied,
203 voxelOccupiedScan, numVoxels);
204 getLastCudaError("compactVoxels failed");
207 // compute interpolated vertex along an edge
209 float3 vertexInterp(float isolevel, float3 p0, float3 p1, float f0, float f1)
211 float t = (isolevel - f0) / (f1 - f0);
212 return lerp(p0, p1, t);
215 // compute interpolated vertex position and normal along an edge
217 void vertexInterp2(float isolevel, float3 p0, float3 p1, float4 f0, float4 f1, float3 &p, float3 &n)
219 float t = (isolevel - f0.w) / (f1.w - f0.w);
221 n.x = lerp(f0.x, f1.x, t);
222 n.y = lerp(f0.y, f1.y, t);
223 n.z = lerp(f0.z, f1.z, t);
227 // generate triangles for each voxel using marching cubes
228 // interpolates normals from field function
230 generateTriangles(float4 *pos, float4 *norm, uint *compactedVoxelArray, uint *numVertsScanned,
231 uint3 gridSize, uint3 gridSizeShift, uint3 gridSizeMask,
232 float3 voxelSize, float isoValue, uint activeVoxels, uint maxVerts)
234 uint blockId = __mul24(blockIdx.y, gridDim.x) + blockIdx.x;
235 uint i = __mul24(blockId, blockDim.x) + threadIdx.x;
237 if (i > activeVoxels - 1)
239 // can't return here because of syncthreads()
240 i = activeVoxels - 1;
243 #if SKIP_EMPTY_VOXELS
244 uint voxel = compactedVoxelArray[i];
249 // compute position in 3d grid
250 uint3 gridPos = calcGridPos(voxel, gridSizeShift, gridSizeMask);
253 p.x = -1.0f + (gridPos.x * voxelSize.x);
254 p.y = -1.0f + (gridPos.y * voxelSize.y);
255 p.z = -1.0f + (gridPos.z * voxelSize.z);
257 // calculate cell vertex positions
260 v[1] = p + make_float3(voxelSize.x, 0, 0);
261 v[2] = p + make_float3(voxelSize.x, voxelSize.y, 0);
262 v[3] = p + make_float3(0, voxelSize.y, 0);
263 v[4] = p + make_float3(0, 0, voxelSize.z);
264 v[5] = p + make_float3(voxelSize.x, 0, voxelSize.z);
265 v[6] = p + make_float3(voxelSize.x, voxelSize.y, voxelSize.z);
266 v[7] = p + make_float3(0, voxelSize.y, voxelSize.z);
268 // evaluate field values
270 field[0] = fieldFunc4(v[0]);
271 field[1] = fieldFunc4(v[1]);
272 field[2] = fieldFunc4(v[2]);
273 field[3] = fieldFunc4(v[3]);
274 field[4] = fieldFunc4(v[4]);
275 field[5] = fieldFunc4(v[5]);
276 field[6] = fieldFunc4(v[6]);
277 field[7] = fieldFunc4(v[7]);
280 // (this is faster than storing it in global memory)
282 cubeindex = uint(field[0].w < isoValue);
283 cubeindex += uint(field[1].w < isoValue)*2;
284 cubeindex += uint(field[2].w < isoValue)*4;
285 cubeindex += uint(field[3].w < isoValue)*8;
286 cubeindex += uint(field[4].w < isoValue)*16;
287 cubeindex += uint(field[5].w < isoValue)*32;
288 cubeindex += uint(field[6].w < isoValue)*64;
289 cubeindex += uint(field[7].w < isoValue)*128;
291 // find the vertices where the surface intersects the cube
294 // use partioned shared memory to avoid using local memory
295 __shared__ float3 vertlist[12*NTHREADS];
296 __shared__ float3 normlist[12*NTHREADS];
298 vertexInterp2(isoValue, v[0], v[1], field[0], field[1], vertlist[threadIdx.x], normlist[threadIdx.x]);
299 vertexInterp2(isoValue, v[1], v[2], field[1], field[2], vertlist[threadIdx.x+NTHREADS], normlist[threadIdx.x+NTHREADS]);
300 vertexInterp2(isoValue, v[2], v[3], field[2], field[3], vertlist[threadIdx.x+(NTHREADS*2)], normlist[threadIdx.x+(NTHREADS*2)]);
301 vertexInterp2(isoValue, v[3], v[0], field[3], field[0], vertlist[threadIdx.x+(NTHREADS*3)], normlist[threadIdx.x+(NTHREADS*3)]);
302 vertexInterp2(isoValue, v[4], v[5], field[4], field[5], vertlist[threadIdx.x+(NTHREADS*4)], normlist[threadIdx.x+(NTHREADS*4)]);
303 vertexInterp2(isoValue, v[5], v[6], field[5], field[6], vertlist[threadIdx.x+(NTHREADS*5)], normlist[threadIdx.x+(NTHREADS*5)]);
304 vertexInterp2(isoValue, v[6], v[7], field[6], field[7], vertlist[threadIdx.x+(NTHREADS*6)], normlist[threadIdx.x+(NTHREADS*6)]);
305 vertexInterp2(isoValue, v[7], v[4], field[7], field[4], vertlist[threadIdx.x+(NTHREADS*7)], normlist[threadIdx.x+(NTHREADS*7)]);
306 vertexInterp2(isoValue, v[0], v[4], field[0], field[4], vertlist[threadIdx.x+(NTHREADS*8)], normlist[threadIdx.x+(NTHREADS*8)]);
307 vertexInterp2(isoValue, v[1], v[5], field[1], field[5], vertlist[threadIdx.x+(NTHREADS*9)], normlist[threadIdx.x+(NTHREADS*9)]);
308 vertexInterp2(isoValue, v[2], v[6], field[2], field[6], vertlist[threadIdx.x+(NTHREADS*10)], normlist[threadIdx.x+(NTHREADS*10)]);
309 vertexInterp2(isoValue, v[3], v[7], field[3], field[7], vertlist[threadIdx.x+(NTHREADS*11)], normlist[threadIdx.x+(NTHREADS*11)]);
316 vertexInterp2(isoValue, v[0], v[1], field[0], field[1], vertlist[0], normlist[0]);
317 vertexInterp2(isoValue, v[1], v[2], field[1], field[2], vertlist[1], normlist[1]);
318 vertexInterp2(isoValue, v[2], v[3], field[2], field[3], vertlist[2], normlist[2]);
319 vertexInterp2(isoValue, v[3], v[0], field[3], field[0], vertlist[3], normlist[3]);
321 vertexInterp2(isoValue, v[4], v[5], field[4], field[5], vertlist[4], normlist[4]);
322 vertexInterp2(isoValue, v[5], v[6], field[5], field[6], vertlist[5], normlist[5]);
323 vertexInterp2(isoValue, v[6], v[7], field[6], field[7], vertlist[6], normlist[6]);
324 vertexInterp2(isoValue, v[7], v[4], field[7], field[4], vertlist[7], normlist[7]);
326 vertexInterp2(isoValue, v[0], v[4], field[0], field[4], vertlist[8], normlist[8]);
327 vertexInterp2(isoValue, v[1], v[5], field[1], field[5], vertlist[9], normlist[9]);
328 vertexInterp2(isoValue, v[2], v[6], field[2], field[6], vertlist[10], normlist[10]);
329 vertexInterp2(isoValue, v[3], v[7], field[3], field[7], vertlist[11], normlist[11]);
332 // output triangle vertices
333 uint numVerts = tex1Dfetch(numVertsTex, cubeindex);
335 for (int i=0; i<numVerts; i++)
337 uint edge = tex1Dfetch(triTex, cubeindex*16 + i);
339 uint index = numVertsScanned[voxel] + i;
341 if (index < maxVerts)
344 pos[index] = make_float4(vertlist[(edge*NTHREADS)+threadIdx.x], 1.0f);
345 norm[index] = make_float4(normlist[(edge*NTHREADS)+threadIdx.x], 0.0f);
347 pos[index] = make_float4(vertlist[edge], 1.0f);
348 norm[index] = make_float4(normlist[edge], 0.0f);
355 launch_generateTriangles(dim3 grid, dim3 threads,
356 float4 *pos, float4 *norm, uint *compactedVoxelArray, uint *numVertsScanned,
357 uint3 gridSize, uint3 gridSizeShift, uint3 gridSizeMask,
358 float3 voxelSize, float isoValue, uint activeVoxels, uint maxVerts)
360 generateTriangles<<<grid, NTHREADS>>>(pos, norm,
363 gridSize, gridSizeShift, gridSizeMask,
364 voxelSize, isoValue, activeVoxels,
366 getLastCudaError("generateTriangles failed");
370 // calculate triangle normal
372 float3 calcNormal(float3 *v0, float3 *v1, float3 *v2)
374 float3 edge0 = *v1 - *v0;
375 float3 edge1 = *v2 - *v0;
376 // note - it's faster to perform normalization in vertex shader rather than here
377 return cross(edge0, edge1);
380 // version that calculates flat surface normal for each triangle
382 generateTriangles2(float4 *pos, float4 *norm, uint *compactedVoxelArray, uint *numVertsScanned, uchar *volume,
383 uint3 gridSize, uint3 gridSizeShift, uint3 gridSizeMask,
384 float3 voxelSize, float isoValue, uint activeVoxels, uint maxVerts)
386 uint blockId = __mul24(blockIdx.y, gridDim.x) + blockIdx.x;
387 uint i = __mul24(blockId, blockDim.x) + threadIdx.x;
389 if (i > activeVoxels - 1)
391 i = activeVoxels - 1;
394 #if SKIP_EMPTY_VOXELS
395 uint voxel = compactedVoxelArray[i];
400 // compute position in 3d grid
401 uint3 gridPos = calcGridPos(voxel, gridSizeShift, gridSizeMask);
404 p.x = -1.0f + (gridPos.x * voxelSize.x);
405 p.y = -1.0f + (gridPos.y * voxelSize.y);
406 p.z = -1.0f + (gridPos.z * voxelSize.z);
408 // calculate cell vertex positions
411 v[1] = p + make_float3(voxelSize.x, 0, 0);
412 v[2] = p + make_float3(voxelSize.x, voxelSize.y, 0);
413 v[3] = p + make_float3(0, voxelSize.y, 0);
414 v[4] = p + make_float3(0, 0, voxelSize.z);
415 v[5] = p + make_float3(voxelSize.x, 0, voxelSize.z);
416 v[6] = p + make_float3(voxelSize.x, voxelSize.y, voxelSize.z);
417 v[7] = p + make_float3(0, voxelSize.y, voxelSize.z);
421 field[0] = sampleVolume(volume, gridPos, gridSize);
422 field[1] = sampleVolume(volume, gridPos + make_uint3(1, 0, 0), gridSize);
423 field[2] = sampleVolume(volume, gridPos + make_uint3(1, 1, 0), gridSize);
424 field[3] = sampleVolume(volume, gridPos + make_uint3(0, 1, 0), gridSize);
425 field[4] = sampleVolume(volume, gridPos + make_uint3(0, 0, 1), gridSize);
426 field[5] = sampleVolume(volume, gridPos + make_uint3(1, 0, 1), gridSize);
427 field[6] = sampleVolume(volume, gridPos + make_uint3(1, 1, 1), gridSize);
428 field[7] = sampleVolume(volume, gridPos + make_uint3(0, 1, 1), gridSize);
430 // evaluate field values
432 field[0] = fieldFunc(v[0]);
433 field[1] = fieldFunc(v[1]);
434 field[2] = fieldFunc(v[2]);
435 field[3] = fieldFunc(v[3]);
436 field[4] = fieldFunc(v[4]);
437 field[5] = fieldFunc(v[5]);
438 field[6] = fieldFunc(v[6]);
439 field[7] = fieldFunc(v[7]);
444 cubeindex = uint(field[0] < isoValue);
445 cubeindex += uint(field[1] < isoValue)*2;
446 cubeindex += uint(field[2] < isoValue)*4;
447 cubeindex += uint(field[3] < isoValue)*8;
448 cubeindex += uint(field[4] < isoValue)*16;
449 cubeindex += uint(field[5] < isoValue)*32;
450 cubeindex += uint(field[6] < isoValue)*64;
451 cubeindex += uint(field[7] < isoValue)*128;
453 // find the vertices where the surface intersects the cube
456 // use shared memory to avoid using local
457 __shared__ float3 vertlist[12*NTHREADS];
459 vertlist[threadIdx.x] = vertexInterp(isoValue, v[0], v[1], field[0], field[1]);
460 vertlist[NTHREADS+threadIdx.x] = vertexInterp(isoValue, v[1], v[2], field[1], field[2]);
461 vertlist[(NTHREADS*2)+threadIdx.x] = vertexInterp(isoValue, v[2], v[3], field[2], field[3]);
462 vertlist[(NTHREADS*3)+threadIdx.x] = vertexInterp(isoValue, v[3], v[0], field[3], field[0]);
463 vertlist[(NTHREADS*4)+threadIdx.x] = vertexInterp(isoValue, v[4], v[5], field[4], field[5]);
464 vertlist[(NTHREADS*5)+threadIdx.x] = vertexInterp(isoValue, v[5], v[6], field[5], field[6]);
465 vertlist[(NTHREADS*6)+threadIdx.x] = vertexInterp(isoValue, v[6], v[7], field[6], field[7]);
466 vertlist[(NTHREADS*7)+threadIdx.x] = vertexInterp(isoValue, v[7], v[4], field[7], field[4]);
467 vertlist[(NTHREADS*8)+threadIdx.x] = vertexInterp(isoValue, v[0], v[4], field[0], field[4]);
468 vertlist[(NTHREADS*9)+threadIdx.x] = vertexInterp(isoValue, v[1], v[5], field[1], field[5]);
469 vertlist[(NTHREADS*10)+threadIdx.x] = vertexInterp(isoValue, v[2], v[6], field[2], field[6]);
470 vertlist[(NTHREADS*11)+threadIdx.x] = vertexInterp(isoValue, v[3], v[7], field[3], field[7]);
476 vertlist[0] = vertexInterp(isoValue, v[0], v[1], field[0], field[1]);
477 vertlist[1] = vertexInterp(isoValue, v[1], v[2], field[1], field[2]);
478 vertlist[2] = vertexInterp(isoValue, v[2], v[3], field[2], field[3]);
479 vertlist[3] = vertexInterp(isoValue, v[3], v[0], field[3], field[0]);
481 vertlist[4] = vertexInterp(isoValue, v[4], v[5], field[4], field[5]);
482 vertlist[5] = vertexInterp(isoValue, v[5], v[6], field[5], field[6]);
483 vertlist[6] = vertexInterp(isoValue, v[6], v[7], field[6], field[7]);
484 vertlist[7] = vertexInterp(isoValue, v[7], v[4], field[7], field[4]);
486 vertlist[8] = vertexInterp(isoValue, v[0], v[4], field[0], field[4]);
487 vertlist[9] = vertexInterp(isoValue, v[1], v[5], field[1], field[5]);
488 vertlist[10] = vertexInterp(isoValue, v[2], v[6], field[2], field[6]);
489 vertlist[11] = vertexInterp(isoValue, v[3], v[7], field[3], field[7]);
492 // output triangle vertices
493 uint numVerts = tex1Dfetch(numVertsTex, cubeindex);
495 for (int i=0; i<numVerts; i+=3)
497 uint index = numVertsScanned[voxel] + i;
501 edge = tex1Dfetch(triTex, (cubeindex*16) + i);
503 v[0] = &vertlist[(edge*NTHREADS)+threadIdx.x];
505 v[0] = &vertlist[edge];
508 edge = tex1Dfetch(triTex, (cubeindex*16) + i + 1);
510 v[1] = &vertlist[(edge*NTHREADS)+threadIdx.x];
512 v[1] = &vertlist[edge];
515 edge = tex1Dfetch(triTex, (cubeindex*16) + i + 2);
517 v[2] = &vertlist[(edge*NTHREADS)+threadIdx.x];
519 v[2] = &vertlist[edge];
522 // calculate triangle surface normal
523 float3 n = calcNormal(v[0], v[1], v[2]);
525 if (index < (maxVerts - 3))
527 pos[index] = make_float4(*v[0], 1.0f);
528 norm[index] = make_float4(n, 0.0f);
530 pos[index+1] = make_float4(*v[1], 1.0f);
531 norm[index+1] = make_float4(n, 0.0f);
533 pos[index+2] = make_float4(*v[2], 1.0f);
534 norm[index+2] = make_float4(n, 0.0f);
540 launch_generateTriangles2(dim3 grid, dim3 threads,
541 float4 *pos, float4 *norm, uint *compactedVoxelArray, uint *numVertsScanned, uchar *volume,
542 uint3 gridSize, uint3 gridSizeShift, uint3 gridSizeMask,
543 float3 voxelSize, float isoValue, uint activeVoxels, uint maxVerts)
545 generateTriangles2<<<grid, NTHREADS>>>(pos, norm,
547 numVertsScanned, volume,
548 gridSize, gridSizeShift, gridSizeMask,
549 voxelSize, isoValue, activeVoxels,
551 getLastCudaError("generateTriangles2 failed");
554 extern "C" void ThrustScanWrapper(unsigned int *output, unsigned int *input, unsigned int numElements)
556 thrust::exclusive_scan(thrust::device_ptr<unsigned int>(input),
557 thrust::device_ptr<unsigned int>(input + numElements),
558 thrust::device_ptr<unsigned int>(output));