2 * Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
4 * Please refer to the NVIDIA end user license agreement (EULA) associated
5 * with this source code for terms and conditions that govern your use of
6 * this software. Any use, reproduction, disclosure, or distribution of
7 * this software and related documentation outside the terms of the EULA
8 * is strictly prohibited.
14 This example demonstrates the use of CUDA/OpenGL interoperability
15 to post-process an image of a 3D scene generated in OpenGL.
18 1 - render the scene to the framebuffer
19 2 - map the color texture so that its memory is accessible from CUDA
20 4 - run CUDA to process the image, writing to memory
21 a- either mapped from a second PBO
22 b- or allocated through CUDA
24 a- from the PBO to a texture with glTexSubImage2D()
25 b- or map the target texture and do a cuda memory copy
26 7 - display the texture with a fullscreen quad
28 The example also provides two solutions for the format of the image:
29 - RGBA16F : more bytes involved but easier to handle because
30 compatible with regular fragment shader
31 - RGBA8UI : 32bytes, but the teapot color must be scaled by 255 (so we needed GLSL code)
32 How about RGBA8? The CUDA driver does not have consistent interoperability with this format.
33 Older GPUs may not store data the same way compared with newer GPUs, resulting in a swap of R and B components
34 On older HW, this will need workarounds.
36 Press space to toggle the CUDA processing on/off.
37 Press 'a' to toggle animation.
38 Press '+' and '-' to increment and decrement blur radius
41 // this mode is "old fashion" : use glTexSubImage2D() to update the final result
42 // commenting it will make the sample use the other way :
43 // map a texture in CUDA and blit the result into it
44 #define USE_TEXSUBIMAGE2D
47 # define WINDOWS_LEAN_AND_MEAN
50 #pragma warning(disable:4996)
53 // OpenGL Graphics includes
55 #if defined(__APPLE__) || defined(MACOSX)
56 #include <GLUT/glut.h>
57 // Sorry for Apple : unsigned int sampler is not available to you, yet...
58 // Let's switch to the use of PBO and glTexSubImage
59 #define USE_TEXSUBIMAGE2D
61 #include <GL/freeglut.h>
65 #include <cuda_runtime.h>
66 #include <cuda_gl_interop.h>
68 // CUDA utilities and system includes
69 #include <helper_cuda.h>
70 #include <helper_cuda_gl.h>
72 #include <helper_functions.h>
73 #include <rendercheck_gl.h>
75 // Shared Library Test Functions
76 #define MAX_EPSILON 10
77 #define REFRESH_DELAY 10 //ms
79 const char *sSDKname = "postProcessGL";
81 unsigned int g_TotalErrors = 0;
83 // CheckFBO/BackBuffer class objects
84 CheckRender *g_CheckRender = NULL;
86 ////////////////////////////////////////////////////////////////////////////////
87 // constants / global variables
88 unsigned int window_width = 512;
89 unsigned int window_height = 512;
90 unsigned int image_width = 512;
91 unsigned int image_height = 512;
92 int iGLUTWindowHandle = 0; // handle to the GLUT window
94 // pbo and fbo variables
95 #ifdef USE_TEXSUBIMAGE2D
97 struct cudaGraphicsResource *cuda_pbo_dest_resource;
99 unsigned int *cuda_dest_resource;
100 GLuint shDrawTex; // draws a texture
101 struct cudaGraphicsResource *cuda_tex_result_resource;
105 struct cudaGraphicsResource *cuda_tex_screen_resource;
107 unsigned int size_tex_data;
108 unsigned int num_texels;
109 unsigned int num_values;
111 // (offscreen) render target fbo variables
112 GLuint framebuffer; // to bind the proper targets
113 GLuint depth_buffer; // for proper depth test while rendering the scene
114 GLuint tex_screen; // where we render the image
115 GLuint tex_cudaResult; // where we will copy the CUDA result
119 char *ref_file = NULL;
120 bool enable_cuda = true;
123 int max_blur_radius = 16;
130 static int fpsCount = 0;
131 static int fpsLimit = 1;
132 StopWatchInterface *timer = NULL;
134 #ifndef USE_TEXTURE_RGBA8UI
135 # pragma message("Note: Using Texture fmt GL_RGBA16F_ARB")
137 // NOTE: the current issue with regular RGBA8 internal format of textures
138 // is that HW stores them as BGRA8. Therefore CUDA will see BGRA where users
139 // expected RGBA8. To prevent this issue, the driver team decided to prevent this to happen
140 // instead, use RGBA8UI which required the additional work of scaling the fragment shader
141 // output from 0-1 to 0-255. This is why we have some GLSL code, in this case
142 # pragma message("Note: Using Texture RGBA8UI + GLSL for teapot rendering")
144 GLuint shDrawPot; // colors the teapot
147 bool IsOpenGLAvailable(const char *appName)
152 #if (defined(__APPLE__) || defined(MACOSX))
153 bool IsOpenGLAvailable(const char *appName)
158 // check if this is a linux machine
159 #include <X11/Xlib.h>
161 bool IsOpenGLAvailable(const char *appName)
163 Display *Xdisplay = XOpenDisplay(NULL);
165 if (Xdisplay == NULL)
171 XCloseDisplay(Xdisplay);
178 ////////////////////////////////////////////////////////////////////////////////
180 launch_cudaProcess(dim3 grid, dim3 block, int sbytes,
181 cudaArray *g_data, unsigned int *g_odata,
182 int imgw, int imgh, int tilew,
183 int radius, float threshold, float highlight);
185 // Forward declarations
186 void runStdProgram(int argc, char **argv);
188 void Cleanup(int iExitCode);
191 bool initCUDA(int argc, char **argv, bool bUseGL);
192 bool initGL(int *argc, char **argv);
194 #ifdef USE_TEXSUBIMAGE2D
195 void createPBO(GLuint *pbo, struct cudaGraphicsResource **pbo_resource);
196 void deletePBO(GLuint *pbo);
199 void createTextureDst(GLuint *tex_cudaResult, unsigned int size_x, unsigned int size_y);
200 void createTextureSrc(GLuint *tex_screen, unsigned int size_x, unsigned int size_y);
201 void deleteTexture(GLuint *tex);
202 void createDepthBuffer(GLuint *depth, unsigned int size_x, unsigned int size_y);
203 void deleteDepthBuffer(GLuint *depth);
204 void createFramebuffer(GLuint *fbo, GLuint color, GLuint depth);
205 void deleteFramebuffer(GLuint *fbo);
207 // rendering callbacks
210 void keyboard(unsigned char key, int x, int y);
211 void reshape(int w, int h);
212 void mainMenu(int i);
214 ////////////////////////////////////////////////////////////////////////////////
215 //! Run the Cuda part of the computation
216 ////////////////////////////////////////////////////////////////////////////////
217 void process(int width, int height, int radius)
220 unsigned int *out_data;
222 #ifdef USE_TEXSUBIMAGE2D
223 checkCudaErrors(cudaGraphicsMapResources(1, &cuda_pbo_dest_resource, 0));
225 checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void **)&out_data, &num_bytes,
226 cuda_pbo_dest_resource));
227 //printf("CUDA mapped pointer of pbo_out: May access %ld bytes, expected %d\n", num_bytes, size_tex_data);
229 out_data = cuda_dest_resource;
232 // map buffer objects to get CUDA device pointers
233 checkCudaErrors(cudaGraphicsMapResources(1, &cuda_tex_screen_resource, 0));
234 //printf("Mapping tex_in\n");
235 checkCudaErrors(cudaGraphicsSubResourceGetMappedArray(&in_array, cuda_tex_screen_resource, 0, 0));
237 // calculate grid size
238 dim3 block(16, 16, 1);
239 //dim3 block(16, 16, 1);
240 dim3 grid(width / block.x, height / block.y, 1);
241 int sbytes = (block.x+(2*radius))*(block.y+(2*radius))*sizeof(unsigned int);
243 // execute CUDA kernel
244 launch_cudaProcess(grid, block, sbytes,
245 in_array, out_data, width, height,
246 block.x+(2*radius), radius, 0.8f, 4.0f);
248 checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_tex_screen_resource, 0));
249 #ifdef USE_TEXSUBIMAGE2D
250 checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_pbo_dest_resource, 0));
254 #ifdef USE_TEXSUBIMAGE2D
255 ////////////////////////////////////////////////////////////////////////////////
257 ////////////////////////////////////////////////////////////////////////////////
259 createPBO(GLuint *pbo, struct cudaGraphicsResource **pbo_resource)
261 // set up vertex data parameter
262 num_texels = image_width * image_height;
263 num_values = num_texels * 4;
264 size_tex_data = sizeof(GLubyte) * num_values;
265 void *data = malloc(size_tex_data);
267 // create buffer object
268 glGenBuffers(1, pbo);
269 glBindBuffer(GL_ARRAY_BUFFER, *pbo);
270 glBufferData(GL_ARRAY_BUFFER, size_tex_data, data, GL_DYNAMIC_DRAW);
273 glBindBuffer(GL_ARRAY_BUFFER, 0);
275 // register this buffer object with CUDA
276 checkCudaErrors(cudaGraphicsGLRegisterBuffer(pbo_resource, *pbo, cudaGraphicsMapFlagsNone));
278 SDK_CHECK_ERROR_GL();
282 deletePBO(GLuint *pbo)
284 glDeleteBuffers(1, pbo);
285 SDK_CHECK_ERROR_GL();
290 const GLenum fbo_targets[] =
292 GL_COLOR_ATTACHMENT0_EXT, GL_COLOR_ATTACHMENT1_EXT,
293 GL_COLOR_ATTACHMENT2_EXT, GL_COLOR_ATTACHMENT3_EXT
296 #ifndef USE_TEXSUBIMAGE2D
297 static const char *glsl_drawtex_vertshader_src =
300 " gl_Position = gl_Vertex;\n"
301 " gl_TexCoord[0].xy = gl_MultiTexCoord0.xy;\n"
304 static const char *glsl_drawtex_fragshader_src =
306 "uniform usampler2D texImage;\n"
309 " vec4 c = texture(texImage, gl_TexCoord[0].xy);\n"
310 " gl_FragColor = c / 255.0;\n"
314 static const char *glsl_drawpot_fragshader_src =
315 //WARNING: seems like the gl_FragColor doesn't want to output >1 colors...
316 //you need version 1.3 so you can define a uvec4 output...
317 //but MacOSX complains about not supporting 1.3 !!
318 // for now, the mode where we use RGBA8UI may not work properly for Apple : only RGBA16F works (default)
319 #if defined(__APPLE__) || defined(MACOSX)
322 " gl_FragColor = vec4(gl_Color * 255.0);\n"
327 "out uvec4 FragColor;\n"
330 " FragColor = uvec4(inColor.xyz * 255.0, 255.0);\n"
334 ////////////////////////////////////////////////////////////////////////////////
335 //! render a simple 3D scene
336 ////////////////////////////////////////////////////////////////////////////////
337 void renderScene(bool colorScale)
339 glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
343 glUseProgram(shDrawPot);
344 glBindFragDataLocationEXT(shDrawPot, 0, "FragColor");
345 SDK_CHECK_ERROR_GL();
348 glMatrixMode(GL_MODELVIEW);
350 glTranslatef(0.0, 0.0, -3.0);
351 glRotatef(rotate[0], 1.0, 0.0, 0.0);
352 glRotatef(rotate[1], 0.0, 1.0, 0.0);
353 glRotatef(rotate[2], 0.0, 0.0, 1.0);
355 glViewport(0, 0, 512, 512);
357 glEnable(GL_LIGHTING);
358 glEnable(GL_DEPTH_TEST);
360 glutSolidTeapot(1.0);
367 SDK_CHECK_ERROR_GL();
370 // copy image and process using CUDA
373 // run the Cuda kernel
374 process(image_width, image_height, blur_radius);
376 // CUDA generated data in cuda memory or in a mapped PBO made of BGRA 8 bits
377 // 2 solutions, here :
378 // - use glTexSubImage2D(), there is the potential to loose performance in possible hidden conversion
379 // - map the texture and blit the result thanks to CUDA API
380 #ifdef USE_TEXSUBIMAGE2D
381 glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, pbo_dest);
383 glBindTexture(GL_TEXTURE_2D, tex_cudaResult);
384 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0,
385 image_width, image_height,
386 GL_RGBA, GL_UNSIGNED_BYTE, NULL);
387 SDK_CHECK_ERROR_GL();
388 glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0);
389 glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
391 // We want to copy cuda_dest_resource data to the texture
392 // map buffer objects to get CUDA device pointers
393 cudaArray *texture_ptr;
394 checkCudaErrors(cudaGraphicsMapResources(1, &cuda_tex_result_resource, 0));
395 checkCudaErrors(cudaGraphicsSubResourceGetMappedArray(&texture_ptr, cuda_tex_result_resource, 0, 0));
397 int num_texels = image_width * image_height;
398 int num_values = num_texels * 4;
399 int size_tex_data = sizeof(GLubyte) * num_values;
400 checkCudaErrors(cudaMemcpyToArray(texture_ptr, 0, 0, cuda_dest_resource, size_tex_data, cudaMemcpyDeviceToDevice));
402 checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_tex_result_resource, 0));
406 // display image to the screen as textured quad
407 void displayImage(GLuint texture)
409 glBindTexture(GL_TEXTURE_2D, texture);
410 glEnable(GL_TEXTURE_2D);
411 glDisable(GL_DEPTH_TEST);
412 glDisable(GL_LIGHTING);
413 glTexEnvf(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
415 glMatrixMode(GL_PROJECTION);
418 glOrtho(-1.0, 1.0, -1.0, 1.0, -1.0, 1.0);
420 glMatrixMode(GL_MODELVIEW);
423 glViewport(0, 0, window_width, window_height);
425 // if the texture is a 8 bits UI, scale the fetch with a GLSL shader
426 #ifndef USE_TEXSUBIMAGE2D
427 glUseProgram(shDrawTex);
428 GLint id = glGetUniformLocation(shDrawTex, "texImage");
429 glUniform1i(id, 0); // texture unit 0 to "texImage"
430 SDK_CHECK_ERROR_GL();
434 glTexCoord2f(0.0, 0.0);
435 glVertex3f(-1.0, -1.0, 0.5);
436 glTexCoord2f(1.0, 0.0);
437 glVertex3f(1.0, -1.0, 0.5);
438 glTexCoord2f(1.0, 1.0);
439 glVertex3f(1.0, 1.0, 0.5);
440 glTexCoord2f(0.0, 1.0);
441 glVertex3f(-1.0, 1.0, 0.5);
444 glMatrixMode(GL_PROJECTION);
447 glDisable(GL_TEXTURE_2D);
449 #ifndef USE_TEXSUBIMAGE2D
452 SDK_CHECK_ERROR_GL();
455 ////////////////////////////////////////////////////////////////////////////////
457 ////////////////////////////////////////////////////////////////////////////////
461 sdkStartTimer(&timer);
465 glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, framebuffer);
466 #ifndef USE_TEXTURE_RGBA8UI
469 renderScene(true); // output of fragment * by 255 (for RGBA8UI texture)
472 glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);
473 displayImage(tex_cudaResult);
480 // NOTE: I needed to add this call so the timing is consistent.
481 // Need to investigate why
482 cudaDeviceSynchronize();
483 sdkStopTimer(&timer);
488 // If specified, Check rendering against reference,
489 if (ref_file && g_CheckRender && g_CheckRender->IsQAReadback())
496 g_CheckRender->readback(window_width, window_height);
497 char currentOutputPPM[256];
498 sprintf(currentOutputPPM, "teapot_%d.ppm", blur_radius);
499 g_CheckRender->savePPM(currentOutputPPM, true, NULL);
501 if (!g_CheckRender->PPMvsPPM(currentOutputPPM, sdkFindFilePath(ref_file, pArgv[0]), MAX_EPSILON, 0.30f))
506 Cleanup((g_TotalErrors==0) ? EXIT_SUCCESS : EXIT_FAILURE);
512 // Update fps counter, fps/title display and log
513 if (++fpsCount == fpsLimit)
516 float fps = 1000.0f / sdkGetAverageTimerValue(&timer);
517 sprintf(cTitle, "CUDA GL Post Processing (%d x %d): %.1f fps", window_width, window_height, fps);
518 glutSetWindowTitle(cTitle);
519 //printf("%s\n", cTitle);
521 fpsLimit = (int)((fps > 1.0f) ? fps : 1.0f);
522 sdkResetTimer(&timer);
526 void timerEvent(int value)
532 if (rotate[0] > 360.0f)
539 if (rotate[1] > 360.0f)
546 if (rotate[2] > 360.0f)
553 glutTimerFunc(REFRESH_DELAY, timerEvent, 0);
556 ////////////////////////////////////////////////////////////////////////////////
557 //! Keyboard events handler
558 ////////////////////////////////////////////////////////////////////////////////
560 keyboard(unsigned char key, int /*x*/, int /*y*/)
565 Cleanup(EXIT_SUCCESS);
570 #ifdef USE_TEXTURE_RGBA8UI
574 glClearColorIuiEXT(128,128,128,255);
578 glClearColor(0.5, 0.5, 0.5, 1.0);
590 if (blur_radius < 16)
595 printf("radius = %d\n", blur_radius);
604 printf("radius = %d\n", blur_radius);
609 void reshape(int w, int h)
617 keyboard((unsigned char) i, 0, 0);
620 ////////////////////////////////////////////////////////////////////////////////
622 ////////////////////////////////////////////////////////////////////////////////
624 createTextureSrc(GLuint *tex_screen, unsigned int size_x, unsigned int size_y)
627 glGenTextures(1, tex_screen);
628 glBindTexture(GL_TEXTURE_2D, *tex_screen);
630 // set basic parameters
631 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
632 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
633 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
634 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
637 #ifndef USE_TEXTURE_RGBA8UI
638 printf("Creating a Texture render target GL_RGBA16F_ARB\n");
639 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, size_x, size_y, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
641 printf("Creating a Texture render target GL_RGBA8UI_EXT\n");
642 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8UI_EXT, size_x, size_y, 0, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_BYTE, NULL);
644 SDK_CHECK_ERROR_GL();
645 // register this texture with CUDA
646 checkCudaErrors(cudaGraphicsGLRegisterImage(&cuda_tex_screen_resource, *tex_screen,
647 GL_TEXTURE_2D, cudaGraphicsMapFlagsReadOnly));
650 ////////////////////////////////////////////////////////////////////////////////
652 ////////////////////////////////////////////////////////////////////////////////
654 createTextureDst(GLuint *tex_cudaResult, unsigned int size_x, unsigned int size_y)
657 glGenTextures(1, tex_cudaResult);
658 glBindTexture(GL_TEXTURE_2D, *tex_cudaResult);
660 // set basic parameters
661 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
662 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
663 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
664 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
666 #ifdef USE_TEXSUBIMAGE2D
667 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, size_x, size_y, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
668 SDK_CHECK_ERROR_GL();
670 glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8UI_EXT, size_x, size_y, 0, GL_RGBA_INTEGER_EXT, GL_UNSIGNED_BYTE, NULL);
671 SDK_CHECK_ERROR_GL();
672 // register this texture with CUDA
673 checkCudaErrors(cudaGraphicsGLRegisterImage(&cuda_tex_result_resource, *tex_cudaResult,
674 GL_TEXTURE_2D, cudaGraphicsMapFlagsWriteDiscard));
678 ////////////////////////////////////////////////////////////////////////////////
680 ////////////////////////////////////////////////////////////////////////////////
682 deleteTexture(GLuint *tex)
684 glDeleteTextures(1, tex);
685 SDK_CHECK_ERROR_GL();
690 ////////////////////////////////////////////////////////////////////////////////
692 ////////////////////////////////////////////////////////////////////////////////
694 createDepthBuffer(GLuint *depth, unsigned int size_x, unsigned int size_y)
696 // create a renderbuffer
697 glGenRenderbuffersEXT(1, depth);
698 glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, *depth);
701 glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_DEPTH_COMPONENT24, size_x, size_y);
704 glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, 0);
706 SDK_CHECK_ERROR_GL();
709 ////////////////////////////////////////////////////////////////////////////////
711 ////////////////////////////////////////////////////////////////////////////////
713 //createRenderBuffer(GLuint* render, unsigned int size_x, unsigned int size_y)
715 // // create a renderbuffer
716 // glGenRenderbuffersEXT(1, render);
717 // glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, *render);
719 // // allocate storage
720 // glRenderbufferStorageEXT(GL_RENDERBUFFER_EXT, GL_RGBA8, size_x, size_y);
723 // glBindRenderbufferEXT(GL_RENDERBUFFER_EXT, 0);
725 // SDK_CHECK_ERROR_GL();
727 // checkCudaErrors(cudaGraphicsGLRegisterImage(&cuda_tex_screen_resource, *render,
728 // GL_RENDERBUFFER_EXT, cudaGraphicsMapFlagsReadOnly));
731 ////////////////////////////////////////////////////////////////////////////////
733 ////////////////////////////////////////////////////////////////////////////////
735 deleteDepthBuffer(GLuint *depth)
737 glDeleteRenderbuffersEXT(1, depth);
738 SDK_CHECK_ERROR_GL();
743 ////////////////////////////////////////////////////////////////////////////////
745 ////////////////////////////////////////////////////////////////////////////////
747 createFramebuffer(GLuint *fbo, GLuint color, GLuint depth)
749 // create and bind a framebuffer
750 glGenFramebuffersEXT(1, fbo);
751 glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, *fbo);
754 glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_TEXTURE_2D, color, 0);
755 //glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT, GL_RENDERBUFFER_EXT, color);
756 glFramebufferRenderbufferEXT(GL_FRAMEBUFFER_EXT, GL_DEPTH_ATTACHMENT_EXT, GL_RENDERBUFFER_EXT, depth);
759 glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0);
761 SDK_CHECK_ERROR_GL();
764 ////////////////////////////////////////////////////////////////////////////////
766 ////////////////////////////////////////////////////////////////////////////////
768 deleteFramebuffer(GLuint *fbo)
770 glDeleteFramebuffersEXT(1, fbo);
771 SDK_CHECK_ERROR_GL();
776 ////////////////////////////////////////////////////////////////////////////////
778 ////////////////////////////////////////////////////////////////////////////////
780 main(int argc, char **argv)
782 printf("%s Starting...\n\n", argv[0]);
784 if (checkCmdLineFlag(argc, (const char **)argv, "radius") &&
785 checkCmdLineFlag(argc, (const char **)argv, "file"))
788 getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file);
789 blur_radius = getCmdLineArgumentInt(argc, (const char **)argv, "radius");
795 // use command-line specified CUDA device, otherwise use device with highest Gflops/s
796 if (checkCmdLineFlag(argc, (const char **)argv, "device"))
798 printf("[%s]\n", argv[0]);
799 printf(" Does not explicitly support -device=n\n");
800 printf(" This sample requires OpenGL. Only -file=<reference> -radius=<n> are supported\n");
801 printf("exiting...\n");
807 printf("(Test with OpenGL verification)\n");
810 runStdProgram(argc, argv);
814 printf("(Interactive OpenGL Demo)\n");
817 runStdProgram(argc, argv);
823 ////////////////////////////////////////////////////////////////////////////////
825 ////////////////////////////////////////////////////////////////////////////////
828 sdkDeleteTimer(&timer);
830 // unregister this buffer object with CUDA
831 checkCudaErrors(cudaGraphicsUnregisterResource(cuda_tex_screen_resource));
832 #ifdef USE_TEXSUBIMAGE2D
833 checkCudaErrors(cudaGraphicsUnregisterResource(cuda_pbo_dest_resource));
834 deletePBO(&pbo_dest);
836 cudaFree(cuda_dest_resource);
838 deleteTexture(&tex_screen);
839 deleteTexture(&tex_cudaResult);
840 deleteDepthBuffer(&depth_buffer);
841 deleteFramebuffer(&framebuffer);
845 if (iGLUTWindowHandle)
847 glutDestroyWindow(iGLUTWindowHandle);
850 // finalize logs and leave
851 printf("postProcessGL.exe Exiting...\n");
854 void Cleanup(int iExitCode)
857 printf("Images are %s\n", (iExitCode == EXIT_SUCCESS) ? "Matching" : "Not Matching");
862 ////////////////////////////////////////////////////////////////////////////////
864 ////////////////////////////////////////////////////////////////////////////////
865 GLuint compileGLSLprogram(const char *vertex_shader_src, const char *fragment_shader_src)
869 p = glCreateProgram();
871 if (vertex_shader_src)
873 v = glCreateShader(GL_VERTEX_SHADER);
874 glShaderSource(v, 1, &vertex_shader_src, NULL);
877 // check if shader compiled
879 glGetShaderiv(v, GL_COMPILE_STATUS, &compiled);
883 //#ifdef NV_REPORT_COMPILE_ERRORS
885 glGetShaderInfoLog(v, 256, NULL, temp);
886 printf("Vtx Compile failed:\n%s\n", temp);
897 if (fragment_shader_src)
899 f = glCreateShader(GL_FRAGMENT_SHADER);
900 glShaderSource(f, 1, &fragment_shader_src, NULL);
903 // check if shader compiled
905 glGetShaderiv(f, GL_COMPILE_STATUS, &compiled);
909 //#ifdef NV_REPORT_COMPILE_ERRORS
911 glGetShaderInfoLog(f, 256, NULL, temp);
912 printf("frag Compile failed:\n%s\n", temp);
925 int infologLength = 0;
926 int charsWritten = 0;
929 glGetProgramiv(p, GL_LINK_STATUS, &linked);
932 glGetProgramiv(p, GL_INFO_LOG_LENGTH, (GLint *)&infologLength);
933 if (infologLength > 0)
935 char *infoLog = (char *)malloc(infologLength);
936 glGetProgramInfoLog(p, infologLength, (GLsizei *)&charsWritten, infoLog);
937 printf("Shader compilation error: %s\n", infoLog);
944 ////////////////////////////////////////////////////////////////////////////////
945 //! Allocate the "render target" of CUDA
946 ////////////////////////////////////////////////////////////////////////////////
947 #ifndef USE_TEXSUBIMAGE2D
948 void initCUDABuffers()
950 // set up vertex data parameter
951 num_texels = image_width * image_height;
952 num_values = num_texels * 4;
953 size_tex_data = sizeof(GLubyte) * num_values;
954 checkCudaErrors(cudaMalloc((void **)&cuda_dest_resource, size_tex_data));
955 //checkCudaErrors(cudaHostAlloc((void**)&cuda_dest_resource, size_tex_data, ));
959 ////////////////////////////////////////////////////////////////////////////////
961 ////////////////////////////////////////////////////////////////////////////////
965 #ifdef USE_TEXSUBIMAGE2D
966 createPBO(&pbo_dest, &cuda_pbo_dest_resource);
968 // create texture that will receive the result of CUDA
969 createTextureDst(&tex_cudaResult, image_width, image_height);
971 // create texture for blitting onto the screen
972 createTextureSrc(&tex_screen, image_width, image_height);
973 //createRenderBuffer(&tex_screen, image_width, image_height); // Doesn't work
975 // create a depth buffer for offscreen rendering
976 createDepthBuffer(&depth_buffer, image_width, image_height);
978 // create a framebuffer for offscreen rendering
979 createFramebuffer(&framebuffer, tex_screen, depth_buffer);
981 // load shader programs
982 shDrawPot = compileGLSLprogram(NULL, glsl_drawpot_fragshader_src);
984 #ifndef USE_TEXSUBIMAGE2D
985 shDrawTex = compileGLSLprogram(glsl_drawtex_vertshader_src, glsl_drawtex_fragshader_src);
987 SDK_CHECK_ERROR_GL();
990 ////////////////////////////////////////////////////////////////////////////////
991 //! Run standard demo loop with or without GL verification
992 ////////////////////////////////////////////////////////////////////////////////
994 runStdProgram(int argc, char **argv)
996 // First initialize OpenGL context, so we can properly set the GL for CUDA.
997 // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
998 if (false == initGL(&argc, argv))
1003 // Now initialize CUDA context (GL context has been created already)
1004 initCUDA(argc, argv, true);
1006 sdkCreateTimer(&timer);
1007 sdkResetTimer(&timer);
1009 // register callbacks
1010 glutDisplayFunc(display);
1011 glutKeyboardFunc(keyboard);
1012 glutReshapeFunc(reshape);
1013 glutTimerFunc(REFRESH_DELAY, timerEvent, 0);
1016 glutCreateMenu(mainMenu);
1017 glutAddMenuEntry("Toggle CUDA Post Processing (on/off) [ ]", ' ');
1018 glutAddMenuEntry("Toggle Animation (on/off) [a]", 'a');
1019 glutAddMenuEntry("Increase Blur Radius [=]", '=');
1020 glutAddMenuEntry("Decrease Blur Radius [-]", '-');
1021 glutAddMenuEntry("Quit (esc)", '\033');
1022 glutAttachMenu(GLUT_RIGHT_BUTTON);
1025 #ifndef USE_TEXSUBIMAGE2D
1029 // Creating the Auto-Validation Code
1032 g_CheckRender = new CheckBackBuffer(window_width, window_height, 4);
1033 g_CheckRender->setPixelFormat(GL_RGBA);
1034 g_CheckRender->setExecPath(argv[0]);
1035 g_CheckRender->EnableQAReadback(true);
1040 "\t(right click mouse button for Menu)\n"
1041 "\t[ ] : Toggle CUDA Post Processing (on/off)\n"
1042 "\t[a] : Toggle Animation (on/off)\n"
1043 "\t[=] : Increase Blur Radius\n"
1044 "\t[-] : Decrease Blur Radius\n"
1045 "\t[esc] - Quit\n\n"
1048 // start rendering mainloop
1051 // Normally unused return path
1052 Cleanup(EXIT_SUCCESS);
1055 ////////////////////////////////////////////////////////////////////////////////
1056 //! Initialize CUDA context
1057 ////////////////////////////////////////////////////////////////////////////////
1059 initCUDA(int argc, char **argv, bool bUseGL)
1063 findCudaGLDevice(argc, (const char **)argv);
1067 findCudaDevice(argc, (const char **)argv);
1073 ////////////////////////////////////////////////////////////////////////////////
1075 ////////////////////////////////////////////////////////////////////////////////
1077 initGL(int *argc, char **argv)
1079 if (IsOpenGLAvailable(sSDKname))
1081 fprintf(stderr, " OpenGL device is Available\n");
1085 fprintf(stderr, " OpenGL device is NOT Available, [%s] exiting...\n", sSDKname);
1089 // Create GL context
1090 glutInit(argc, argv);
1091 glutInitDisplayMode(GLUT_RGBA | GLUT_ALPHA | GLUT_DOUBLE | GLUT_DEPTH);
1092 glutInitWindowSize(window_width, window_height);
1093 iGLUTWindowHandle = glutCreateWindow("CUDA OpenGL post-processing");
1095 // initialize necessary OpenGL extensions
1098 if (! glewIsSupported(
1100 "GL_ARB_pixel_buffer_object "
1101 "GL_EXT_framebuffer_object "
1104 printf("ERROR: Support for necessary OpenGL extensions missing.");
1109 // default initialization
1110 #ifndef USE_TEXTURE_RGBA8UI
1111 glClearColor(0.5, 0.5, 0.5, 1.0);
1113 glClearColorIuiEXT(128,128,128,255);
1115 glDisable(GL_DEPTH_TEST);
1118 glViewport(0, 0, window_width, window_height);
1121 glMatrixMode(GL_PROJECTION);
1123 gluPerspective(60.0, (GLfloat)window_width / (GLfloat) window_height, 0.1f, 10.0f);
1125 glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
1127 glEnable(GL_LIGHT0);
1128 float red[] = { 1.0f, 0.1f, 0.1f, 1.0f };
1129 float white[] = { 1.0f, 1.0f, 1.0f, 1.0f };
1130 glMaterialfv(GL_FRONT_AND_BACK, GL_DIFFUSE, red);
1131 glMaterialfv(GL_FRONT_AND_BACK, GL_SPECULAR, white);
1132 glMaterialf(GL_FRONT_AND_BACK, GL_SHININESS, 60.0f);
1134 SDK_CHECK_ERROR_GL();