2 * Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
4 * Please refer to the NVIDIA end user license agreement (EULA) associated
5 * with this source code for terms and conditions that govern your use of
6 * this software. Any use, reproduction, disclosure, or distribution of
7 * this software and related documentation outside the terms of the EULA
8 * is strictly prohibited.
13 // USE_TEXSUBIMAGE2D uses glTexSubImage2D() to update the final result
14 // commenting it will make the sample use the other way :
15 // map a texture in CUDA and blit the result into it
16 //#define USE_TEXSUBIMAGE2D
19 # define WINDOWS_LEAN_AND_MEAN
22 #pragma warning(disable:4996)
25 // OpenGL Graphics includes
27 #if defined(__APPLE__) || defined(MACOSX)
28 #include <GLUT/glut.h>
29 // Sorry for Apple : unsigned int sampler is not available to you, yet...
30 // Let's switch to the use of PBO and glTexSubImage
31 #define USE_TEXSUBIMAGE2D
33 #include <GL/freeglut.h>
37 #include <cuda_runtime.h>
38 #include <cuda_gl_interop.h>
40 // CUDA utilities and system includes
41 #include <helper_cuda.h>
42 #include <helper_cuda_gl.h>
44 #include <helper_functions.h>
45 #include <rendercheck_gl.h>
47 // Shared Library Test Functions
48 #define MAX_EPSILON 10
49 #define REFRESH_DELAY 10 //ms
51 const char *sSDKname = "simpleCUDA2GL";
53 unsigned int g_TotalErrors = 0;
55 // CheckFBO/BackBuffer class objects
56 CheckRender *g_CheckRender = NULL;
58 ////////////////////////////////////////////////////////////////////////////////
59 // constants / global variables
60 unsigned int window_width = 512;
61 unsigned int window_height = 512;
62 unsigned int image_width = 512;
63 unsigned int image_height = 512;
64 int iGLUTWindowHandle = 0; // handle to the GLUT window
66 // pbo and fbo variables
67 #ifdef USE_TEXSUBIMAGE2D
69 struct cudaGraphicsResource *cuda_pbo_dest_resource;
71 unsigned int *cuda_dest_resource;
72 GLuint shDrawTex; // draws a texture
73 struct cudaGraphicsResource *cuda_tex_result_resource;
77 struct cudaGraphicsResource *cuda_tex_screen_resource;
79 unsigned int size_tex_data;
80 unsigned int num_texels;
81 unsigned int num_values;
83 // (offscreen) render target fbo variables
84 GLuint tex_screen; // where we render the image
85 GLuint tex_cudaResult; // where we will copy the CUDA result
87 char *ref_file = NULL;
88 bool enable_cuda = true;
95 static int fpsCount = 0;
96 static int fpsLimit = 1;
97 StopWatchInterface *timer = NULL;
99 #ifndef USE_TEXTURE_RGBA8UI
100 # pragma message("Note: Using Texture fmt GL_RGBA16F_ARB")
102 // NOTE: the current issue with regular RGBA8 internal format of textures
103 // is that HW stores them as BGRA8. Therefore CUDA will see BGRA where users
104 // expected RGBA8. To prevent this issue, the driver team decided to prevent this to happen
105 // instead, use RGBA8UI which required the additional work of scaling the fragment shader
106 // output from 0-1 to 0-255. This is why we have some GLSL code, in this case
107 # pragma message("Note: Using Texture RGBA8UI + GLSL for teapot rendering")
109 GLuint shDrawPot; // colors the teapot
112 bool IsOpenGLAvailable(const char *appName)
117 #if (defined(__APPLE__) || defined(MACOSX))
118 bool IsOpenGLAvailable(const char *appName)
123 // check if this is a linux machine
124 #include <X11/Xlib.h>
126 bool IsOpenGLAvailable(const char *appName)
128 Display *Xdisplay = XOpenDisplay(NULL);
130 if (Xdisplay == NULL)
136 XCloseDisplay(Xdisplay);
143 ////////////////////////////////////////////////////////////////////////////////
145 launch_cudaProcess(dim3 grid, dim3 block, int sbytes,
146 unsigned int *g_odata,
149 // Forward declarations
150 void runStdProgram(int argc, char **argv);
152 void Cleanup(int iExitCode);
155 bool initCUDA(int argc, char **argv, bool bUseGL);
156 bool initGL(int *argc, char **argv);
158 #ifdef USE_TEXSUBIMAGE2D
159 void createPBO(GLuint *pbo, struct cudaGraphicsResource **pbo_resource);
160 void deletePBO(GLuint *pbo);
163 void createTextureDst(GLuint *tex_cudaResult, unsigned int size_x, unsigned int size_y);
164 void deleteTexture(GLuint *tex);
166 // rendering callbacks
169 void keyboard(unsigned char key, int x, int y);
170 void reshape(int w, int h);
171 void mainMenu(int i);
173 #ifdef USE_TEXSUBIMAGE2D
174 ////////////////////////////////////////////////////////////////////////////////
176 ////////////////////////////////////////////////////////////////////////////////
178 createPBO(GLuint *pbo, struct cudaGraphicsResource **pbo_resource)
180 // set up vertex data parameter
181 num_texels = image_width * image_height;
182 num_values = num_texels * 4;
183 size_tex_data = sizeof(GLubyte) * num_values;
184 void *data = malloc(size_tex_data);
186 // create buffer object
187 glGenBuffers(1, pbo);
188 glBindBuffer(GL_ARRAY_BUFFER, *pbo);
189 glBufferData(GL_ARRAY_BUFFER, size_tex_data, data, GL_DYNAMIC_DRAW);
192 glBindBuffer(GL_ARRAY_BUFFER, 0);
194 // register this buffer object with CUDA
195 checkCudaErrors(cudaGraphicsGLRegisterBuffer(pbo_resource, *pbo, cudaGraphicsMapFlagsNone));
197 SDK_CHECK_ERROR_GL();
201 deletePBO(GLuint *pbo)
203 glDeleteBuffers(1, pbo);
204 SDK_CHECK_ERROR_GL();
209 const GLenum fbo_targets[] =
211 GL_COLOR_ATTACHMENT0_EXT, GL_COLOR_ATTACHMENT1_EXT,
212 GL_COLOR_ATTACHMENT2_EXT, GL_COLOR_ATTACHMENT3_EXT
215 #ifndef USE_TEXSUBIMAGE2D
216 static const char *glsl_drawtex_vertshader_src =
219 " gl_Position = gl_Vertex;\n"
220 " gl_TexCoord[0].xy = gl_MultiTexCoord0.xy;\n"
223 static const char *glsl_drawtex_fragshader_src =
225 "uniform usampler2D texImage;\n"
228 " vec4 c = texture(texImage, gl_TexCoord[0].xy);\n"
229 " gl_FragColor = c / 255.0;\n"
233 static const char *glsl_drawpot_fragshader_src =
234 //WARNING: seems like the gl_FragColor doesn't want to output >1 colors...
235 //you need version 1.3 so you can define a uvec4 output...
236 //but MacOSX complains about not supporting 1.3 !!
237 // for now, the mode where we use RGBA8UI may not work properly for Apple : only RGBA16F works (default)
238 #if defined(__APPLE__) || defined(MACOSX)
241 " gl_FragColor = vec4(gl_Color * 255.0);\n"
245 "out uvec4 FragColor;\n"
248 " FragColor = uvec4(gl_Color.xyz * 255.0, 255.0);\n"
252 // copy image and process using CUDA
253 void generateCUDAImage()
255 // run the Cuda kernel
256 unsigned int *out_data;
258 #ifdef USE_TEXSUBIMAGE2D
259 checkCudaErrors(cudaGraphicsMapResources(1, &cuda_pbo_dest_resource, 0));
261 checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void **)&out_data, &num_bytes,
262 cuda_pbo_dest_resource));
263 //printf("CUDA mapped pointer of pbo_out: May access %ld bytes, expected %d\n", num_bytes, size_tex_data);
265 out_data = cuda_dest_resource;
267 // calculate grid size
268 dim3 block(16, 16, 1);
269 //dim3 block(16, 16, 1);
270 dim3 grid(image_width / block.x, image_height / block.y, 1);
271 // execute CUDA kernel
272 launch_cudaProcess(grid, block, 0, out_data, image_width);
275 // CUDA generated data in cuda memory or in a mapped PBO made of BGRA 8 bits
276 // 2 solutions, here :
277 // - use glTexSubImage2D(), there is the potential to loose performance in possible hidden conversion
278 // - map the texture and blit the result thanks to CUDA API
279 #ifdef USE_TEXSUBIMAGE2D
280 checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_pbo_dest_resource, 0));
281 glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo_dest);
283 glBindTexture(GL_TEXTURE_2D, tex_cudaResult);
284 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0,
285 image_width, image_height,
286 GL_RGBA, GL_UNSIGNED_BYTE, NULL);
287 SDK_CHECK_ERROR_GL();
288 glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0);
289 glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
291 // We want to copy cuda_dest_resource data to the texture
292 // map buffer objects to get CUDA device pointers
293 cudaArray *texture_ptr;
294 checkCudaErrors(cudaGraphicsMapResources(1, &cuda_tex_result_resource, 0));
295 checkCudaErrors(cudaGraphicsSubResourceGetMappedArray(&texture_ptr, cuda_tex_result_resource, 0, 0));
297 int num_texels = image_width * image_height;
298 int num_values = num_texels * 4;
299 int size_tex_data = sizeof(GLubyte) * num_values;
300 checkCudaErrors(cudaMemcpyToArray(texture_ptr, 0, 0, cuda_dest_resource, size_tex_data, cudaMemcpyDeviceToDevice));
302 checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_tex_result_resource, 0));
306 // display image to the screen as textured quad
307 void displayImage(GLuint texture)
309 glBindTexture(GL_TEXTURE_2D, texture);
310 glEnable(GL_TEXTURE_2D);
311 glDisable(GL_DEPTH_TEST);
312 glDisable(GL_LIGHTING);
313 glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
315 glMatrixMode(GL_PROJECTION);
318 glOrtho(-1.0, 1.0, -1.0, 1.0, -1.0, 1.0);
320 glMatrixMode(GL_MODELVIEW);
323 glViewport(0, 0, window_width, window_height);
325 // if the texture is a 8 bits UI, scale the fetch with a GLSL shader
326 #ifndef USE_TEXSUBIMAGE2D
327 glUseProgram(shDrawTex);
328 GLint id = glGetUniformLocation(shDrawTex, "texImage");
329 glUniform1i(id, 0); // texture unit 0 to "texImage"
330 SDK_CHECK_ERROR_GL();
334 glTexCoord2f(0.0, 0.0);
335 glVertex3f(-1.0, -1.0, 0.5);
336 glTexCoord2f(1.0, 0.0);
337 glVertex3f(1.0, -1.0, 0.5);
338 glTexCoord2f(1.0, 1.0);
339 glVertex3f(1.0, 1.0, 0.5);
340 glTexCoord2f(0.0, 1.0);
341 glVertex3f(-1.0, 1.0, 0.5);
344 glMatrixMode(GL_PROJECTION);
347 glDisable(GL_TEXTURE_2D);
349 #ifndef USE_TEXSUBIMAGE2D
352 SDK_CHECK_ERROR_GL();
355 ////////////////////////////////////////////////////////////////////////////////
357 ////////////////////////////////////////////////////////////////////////////////
361 sdkStartTimer(&timer);
366 displayImage(tex_cudaResult);
369 // NOTE: I needed to add this call so the timing is consistent.
370 // Need to investigate why
371 cudaDeviceSynchronize();
372 sdkStopTimer(&timer);
377 // If specified, Check rendering against reference,
378 if (ref_file && g_CheckRender && g_CheckRender->IsQAReadback())
385 g_CheckRender->readback(window_width, window_height);
386 char currentOutputPPM[256];
387 sprintf(currentOutputPPM, "kilt.ppm");
388 g_CheckRender->savePPM(currentOutputPPM, true, NULL);
390 if (!g_CheckRender->PPMvsPPM(currentOutputPPM, sdkFindFilePath(ref_file, pArgv[0]), MAX_EPSILON, 0.30f))
395 Cleanup((g_TotalErrors==0) ? EXIT_SUCCESS : EXIT_FAILURE);
401 // Update fps counter, fps/title display and log
402 if (++fpsCount == fpsLimit)
405 float fps = 1000.0f / sdkGetAverageTimerValue(&timer);
406 sprintf(cTitle, "CUDA GL Post Processing (%d x %d): %.1f fps", window_width, window_height, fps);
407 glutSetWindowTitle(cTitle);
408 //printf("%s\n", cTitle);
410 fpsLimit = (int)((fps > 1.0f) ? fps : 1.0f);
411 sdkResetTimer(&timer);
415 void timerEvent(int value)
418 glutTimerFunc(REFRESH_DELAY, timerEvent, 0);
421 ////////////////////////////////////////////////////////////////////////////////
422 //! Keyboard events handler
423 ////////////////////////////////////////////////////////////////////////////////
425 keyboard(unsigned char key, int /*x*/, int /*y*/)
430 Cleanup(EXIT_SUCCESS);
435 #ifdef USE_TEXTURE_RGBA8UI
439 glClearColorIuiEXT(128,128,128,255);
443 glClearColor(0.5, 0.5, 0.5, 1.0);
452 void reshape(int w, int h)
460 keyboard((unsigned char) i, 0, 0);
463 ////////////////////////////////////////////////////////////////////////////////
465 ////////////////////////////////////////////////////////////////////////////////
467 createTextureDst(GLuint *tex_cudaResult, unsigned int size_x, unsigned int size_y)
470 glGenTextures(1, tex_cudaResult);
471 glBindTexture(GL_TEXTURE_2D, *tex_cudaResult);
473 // set basic parameters
474 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
475 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
476 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
477 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
479 #ifdef USE_TEXSUBIMAGE2D
480 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, size_x, size_y, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
481 SDK_CHECK_ERROR_GL();
483 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8UI_EXT, size_x, size_y, 0, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_BYTE, NULL);
484 SDK_CHECK_ERROR_GL();
485 // register this texture with CUDA
486 checkCudaErrors(cudaGraphicsGLRegisterImage(&cuda_tex_result_resource, *tex_cudaResult,
487 GL_TEXTURE_2D, cudaGraphicsMapFlagsWriteDiscard));
491 ////////////////////////////////////////////////////////////////////////////////
493 ////////////////////////////////////////////////////////////////////////////////
495 deleteTexture(GLuint *tex)
497 glDeleteTextures(1, tex);
498 SDK_CHECK_ERROR_GL();
503 ////////////////////////////////////////////////////////////////////////////////
505 ////////////////////////////////////////////////////////////////////////////////
507 main(int argc, char **argv)
509 printf("%s Starting...\n\n", argv[0]);
511 if (checkCmdLineFlag(argc, (const char **)argv, "file"))
514 getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file);
520 // use command-line specified CUDA device, otherwise use device with highest Gflops/s
521 if (checkCmdLineFlag(argc, (const char **)argv, "device"))
523 printf("[%s]\n", argv[0]);
524 printf(" Does not explicitly support -device=n\n");
525 printf(" This sample requires OpenGL. Only -file=<reference> are supported\n");
526 printf("exiting...\n");
532 printf("(Test with OpenGL verification)\n");
533 runStdProgram(argc, argv);
537 printf("(Interactive OpenGL Demo)\n");
538 runStdProgram(argc, argv);
544 ////////////////////////////////////////////////////////////////////////////////
546 ////////////////////////////////////////////////////////////////////////////////
549 sdkDeleteTimer(&timer);
551 // unregister this buffer object with CUDA
552 // checkCudaErrors(cudaGraphicsUnregisterResource(cuda_tex_screen_resource));
553 #ifdef USE_TEXSUBIMAGE2D
554 checkCudaErrors(cudaGraphicsUnregisterResource(cuda_pbo_dest_resource));
555 deletePBO(&pbo_dest);
557 cudaFree(cuda_dest_resource);
559 deleteTexture(&tex_screen);
560 deleteTexture(&tex_cudaResult);
564 if (iGLUTWindowHandle)
566 glutDestroyWindow(iGLUTWindowHandle);
569 // finalize logs and leave
570 printf("postProcessGL.exe Exiting...\n");
573 void Cleanup(int iExitCode)
576 printf("PPM Images are %s\n", (iExitCode == EXIT_SUCCESS) ? "Matching" : "Not Matching");
581 ////////////////////////////////////////////////////////////////////////////////
583 ////////////////////////////////////////////////////////////////////////////////
584 GLuint compileGLSLprogram(const char *vertex_shader_src, const char *fragment_shader_src)
588 p = glCreateProgram();
590 if (vertex_shader_src)
592 v = glCreateShader(GL_VERTEX_SHADER);
593 glShaderSource(v, 1, &vertex_shader_src, NULL);
596 // check if shader compiled
598 glGetShaderiv(v, GL_COMPILE_STATUS, &compiled);
602 //#ifdef NV_REPORT_COMPILE_ERRORS
604 glGetShaderInfoLog(v, 256, NULL, temp);
605 printf("Vtx Compile failed:\n%s\n", temp);
616 if (fragment_shader_src)
618 f = glCreateShader(GL_FRAGMENT_SHADER);
619 glShaderSource(f, 1, &fragment_shader_src, NULL);
622 // check if shader compiled
624 glGetShaderiv(f, GL_COMPILE_STATUS, &compiled);
628 //#ifdef NV_REPORT_COMPILE_ERRORS
630 glGetShaderInfoLog(f, 256, NULL, temp);
631 printf("frag Compile failed:\n%s\n", temp);
644 int infologLength = 0;
645 int charsWritten = 0;
647 glGetProgramiv(p, GL_INFO_LOG_LENGTH, (GLint *)&infologLength);
649 if (infologLength > 0)
651 char *infoLog = (char *)malloc(infologLength);
652 glGetProgramInfoLog(p, infologLength, (GLsizei *)&charsWritten, infoLog);
653 printf("Shader compilation error: %s\n", infoLog);
660 ////////////////////////////////////////////////////////////////////////////////
661 //! Allocate the "render target" of CUDA
662 ////////////////////////////////////////////////////////////////////////////////
663 #ifndef USE_TEXSUBIMAGE2D
664 void initCUDABuffers()
666 // set up vertex data parameter
667 num_texels = image_width * image_height;
668 num_values = num_texels * 4;
669 size_tex_data = sizeof(GLubyte) * num_values;
670 checkCudaErrors(cudaMalloc((void **)&cuda_dest_resource, size_tex_data));
671 //checkCudaErrors(cudaHostAlloc((void**)&cuda_dest_resource, size_tex_data, ));
675 ////////////////////////////////////////////////////////////////////////////////
677 ////////////////////////////////////////////////////////////////////////////////
681 #ifdef USE_TEXSUBIMAGE2D
682 createPBO(&pbo_dest, &cuda_pbo_dest_resource);
684 // create texture that will receive the result of CUDA
685 createTextureDst(&tex_cudaResult, image_width, image_height);
686 // load shader programs
687 shDrawPot = compileGLSLprogram(NULL, glsl_drawpot_fragshader_src);
689 #ifndef USE_TEXSUBIMAGE2D
690 shDrawTex = compileGLSLprogram(glsl_drawtex_vertshader_src, glsl_drawtex_fragshader_src);
692 SDK_CHECK_ERROR_GL();
695 ////////////////////////////////////////////////////////////////////////////////
696 //! Run standard demo loop with or without GL verification
697 ////////////////////////////////////////////////////////////////////////////////
699 runStdProgram(int argc, char **argv)
701 // First initialize OpenGL context, so we can properly set the GL for CUDA.
702 // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
703 if (false == initGL(&argc, argv))
708 // Now initialize CUDA context (GL context has been created already)
709 initCUDA(argc, argv, true);
711 sdkCreateTimer(&timer);
712 sdkResetTimer(&timer);
714 // register callbacks
715 glutDisplayFunc(display);
716 glutKeyboardFunc(keyboard);
717 glutReshapeFunc(reshape);
718 glutTimerFunc(REFRESH_DELAY, timerEvent, 0);
721 glutCreateMenu(mainMenu);
722 glutAddMenuEntry("Quit (esc)", '\033');
723 glutAttachMenu(GLUT_RIGHT_BUTTON);
726 #ifndef USE_TEXSUBIMAGE2D
730 // Creating the Auto-Validation Code
733 g_CheckRender = new CheckBackBuffer(window_width, window_height, 4);
734 g_CheckRender->setPixelFormat(GL_RGBA);
735 g_CheckRender->setExecPath(argv[0]);
736 g_CheckRender->EnableQAReadback(true);
741 "\t(right click mouse button for Menu)\n"
745 // start rendering mainloop
748 // Normally unused return path
749 Cleanup(EXIT_SUCCESS);
752 ////////////////////////////////////////////////////////////////////////////////
753 //! Initialize CUDA context
754 ////////////////////////////////////////////////////////////////////////////////
756 initCUDA(int argc, char **argv, bool bUseGL)
760 findCudaGLDevice(argc, (const char **)argv);
764 findCudaDevice(argc, (const char **)argv);
770 ////////////////////////////////////////////////////////////////////////////////
772 ////////////////////////////////////////////////////////////////////////////////
774 initGL(int *argc, char **argv)
776 if (IsOpenGLAvailable(sSDKname))
778 fprintf(stderr, " OpenGL device is Available\n");
782 fprintf(stderr, " OpenGL device is NOT Available, [%s] exiting...\n", sSDKname);
787 glutInit(argc, argv);
788 glutInitDisplayMode(GLUT_RGBA | GLUT_ALPHA | GLUT_DOUBLE | GLUT_DEPTH);
789 glutInitWindowSize(window_width, window_height);
790 iGLUTWindowHandle = glutCreateWindow("CUDA OpenGL post-processing");
792 // initialize necessary OpenGL extensions
795 if (! glewIsSupported(
797 "GL_ARB_pixel_buffer_object "
798 "GL_EXT_framebuffer_object "
801 printf("ERROR: Support for necessary OpenGL extensions missing.");
806 // default initialization
807 #ifndef USE_TEXTURE_RGBA8UI
808 glClearColor(0.5, 0.5, 0.5, 1.0);
810 glClearColorIuiEXT(128,128,128,255);
812 glDisable(GL_DEPTH_TEST);
815 glViewport(0, 0, window_width, window_height);
818 glMatrixMode(GL_PROJECTION);
820 gluPerspective(60.0, (GLfloat)window_width / (GLfloat) window_height, 0.1f, 10.0f);
822 glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
825 float red[] = { 1.0f, 0.1f, 0.1f, 1.0f };
826 float white[] = { 1.0f, 1.0f, 1.0f, 1.0f };
827 glMaterialfv(GL_FRONT_AND_BACK, GL_DIFFUSE, red);
828 glMaterialfv(GL_FRONT_AND_BACK, GL_SPECULAR, white);
829 glMaterialf(GL_FRONT_AND_BACK, GL_SHININESS, 60.0f);
831 SDK_CHECK_ERROR_GL();