2 * Copyright 1993-2012 NVIDIA Corporation. All rights reserved.
4 * Please refer to the NVIDIA end user license agreement (EULA) associated
5 * with this source code for terms and conditions that govern your use of
6 * this software. Any use, reproduction, disclosure, or distribution of
7 * this software and related documentation outside the terms of the EULA
8 * is strictly prohibited.
14 #include <builtin_types.h>
15 #include <drvapi_error_string.h>
21 // This will output the proper CUDA error strings in the event that a CUDA host call returns an error
22 #define checkCudaErrors(err) __checkCudaErrors (err, __FILE__, __LINE__)
24 // These are the inline versions for all of the SDK helper functions
25 void __checkCudaErrors( CUresult err, const char *file, const int line )
27 if( CUDA_SUCCESS != err) {
28 fprintf(stderr, "checkCudaErrors() Driver API error = %04d \"%s\" from file <%s>, line %i.\n",
29 err, getCudaDrvErrorString(err), file, line );
34 CUdevice cudaDeviceInit()
36 CUdevice cuDevice = 0;
38 CUresult err = cuInit(0);
42 if (CUDA_SUCCESS == err)
43 checkCudaErrors(cuDeviceGetCount(&deviceCount));
44 if (deviceCount == 0) {
45 fprintf(stderr, "cudaDeviceInit error: no devices supporting CUDA\n");
48 checkCudaErrors(cuDeviceGet(&cuDevice, 0));
49 cuDeviceGetName(name, 100, cuDevice);
50 printf("Using CUDA Device [0]: %s\n", name);
52 checkCudaErrors( cuDeviceComputeCapability(&major, &minor, cuDevice) );
54 fprintf(stderr, "Device 0 is not sm_20 or later\n");
61 CUresult initCUDA(CUcontext *phContext,
68 *phDevice = cudaDeviceInit();
70 // Create context on the device
71 checkCudaErrors(cuCtxCreate(phContext, 0, *phDevice));
74 checkCudaErrors(cuModuleLoadDataEx(phModule, ptx, 0, 0, 0));
76 // Locate the kernel entry poin
77 checkCudaErrors(cuModuleGetFunction(phKernel, *phModule, "simple"));
82 char *loadProgramSource(const char *filename, size_t *size)
88 fh = fopen(filename, "rb");
90 stat(filename, &statbuf);
91 source = (char *) malloc(statbuf.st_size+1);
93 fread(source, statbuf.st_size, 1, fh);
94 source[statbuf.st_size] = 0;
95 *size = statbuf.st_size+1;
99 fprintf(stderr, "Error reading file %s\n", filename);
105 char *generatePTX(const char *ll, size_t size, const char *filename)
112 result = nvvmCreateProgram(&program);
113 if (result != NVVM_SUCCESS) {
114 fprintf(stderr, "nvvmCreateProgram: Failed\n");
118 result = nvvmAddModuleToProgram(program, ll, size, filename);
119 if (result != NVVM_SUCCESS) {
120 fprintf(stderr, "nvvmAddModuleToProgram: Failed\n");
124 result = nvvmCompileProgram(program, 0, NULL);
125 if (result != NVVM_SUCCESS) {
128 fprintf(stderr, "nvvmCompileProgram: Failed\n");
129 nvvmGetProgramLogSize(program, &LogSize);
130 Msg = (char*)malloc(LogSize);
131 nvvmGetProgramLog(program, Msg);
132 fprintf(stderr, "%s\n", Msg);
137 result = nvvmGetCompiledResultSize(program, &PTXSize);
138 if (result != NVVM_SUCCESS) {
139 fprintf(stderr, "nvvmGetCompiledResultSize: Failed\n");
143 PTX = (char*)malloc(PTXSize);
144 result = nvvmGetCompiledResult(program, PTX);
145 if (result != NVVM_SUCCESS) {
146 fprintf(stderr, "nvvmGetCompiledResult: Failed\n");
151 result = nvvmDestroyProgram(&program);
152 if (result != NVVM_SUCCESS) {
153 fprintf(stderr, "nvvmDestroyProgram: Failed\n");
161 int main(int argc, char **argv)
163 const unsigned int nThreads = 32;
164 const unsigned int nBlocks = 1;
165 const size_t memSize = nThreads * nBlocks * sizeof(int);
167 CUcontext hContext = 0;
168 CUdevice hDevice = 0;
169 CUmodule hModule = 0;
170 CUfunction hKernel = 0;
171 CUdeviceptr d_data = 0;
176 // Get the ll from file
179 void *params[] = { &d_data };
181 const char *filename = "simple-gpu64.ll";
183 const char *filename = "simple-gpu.ll";
185 char *ll = loadProgramSource(filename, &size);
186 fprintf(stdout, "NVVM IR ll file loaded\n");
188 // Use libnvvm to generte PTX
189 ptx = generatePTX(ll, size, filename);
190 fprintf(stdout, "PTX generated:\n");
191 fprintf(stdout, "%s\n", ptx);
193 // Initialize the device and get a handle to the kernel
194 checkCudaErrors(initCUDA(&hContext, &hDevice, &hModule, &hKernel, ptx));
196 // Allocate memory on host and device
197 if ((h_data = (int *)malloc(memSize)) == NULL) {
198 fprintf(stderr, "Could not allocate host memory\n");
201 checkCudaErrors(cuMemAlloc(&d_data, memSize));
204 checkCudaErrors(cuLaunchKernel(hKernel, nBlocks, 1, 1, nThreads, 1, 1,
205 0, NULL, params, NULL));
206 fprintf(stdout, "CUDA kernel launched\n");
208 // Copy the result back to the host
209 checkCudaErrors(cuMemcpyDtoH(h_data, d_data, memSize));
212 for (i = 0 ; i < nBlocks * nThreads ; i++) {
213 fprintf(stdout, "%d ", h_data[i]);
216 fprintf(stdout, "\n");
220 checkCudaErrors(cuMemFree(d_data));
228 checkCudaErrors(cuModuleUnload(hModule));
232 checkCudaErrors(cuCtxDestroy(hContext));