util/X86LINUX64/cuda-6.5/nvvm/libnvvm-samples/simple/simple.c

   1 /*
   2  * Copyright 1993-2012 NVIDIA Corporation.  All rights reserved.
   3  *
   4  * Please refer to the NVIDIA end user license agreement (EULA) associated
   5  * with this source code for terms and conditions that govern your use of
   6  * this software. Any use, reproduction, disclosure, or distribution of
   7  * this software and related documentation outside the terms of the EULA
   8  * is strictly prohibited.
   9  *
  10  */
  11
  12 #include <math.h>
  13 #include <cuda.h>
  14 #include <builtin_types.h>
  15 #include <drvapi_error_string.h>
  16 #include "nvvm.h"
  17 #include <stdio.h>
  18 #include <stdlib.h>
  19 #include <sys/stat.h>
  20
  21 // This will output the proper CUDA error strings in the event that a CUDA host call returns an error
  22 #define checkCudaErrors(err)  __checkCudaErrors (err, __FILE__, __LINE__)
  23
  24 // These are the inline versions for all of the SDK helper functions
  25 void __checkCudaErrors( CUresult err, const char *file, const int line )
  26 {
  27     if( CUDA_SUCCESS != err) {
  28         fprintf(stderr, "checkCudaErrors() Driver API error = %04d \"%s\" from file <%s>, line %i.\n",
  29                 err, getCudaDrvErrorString(err), file, line );
  30         exit(-1);
  31     }
  32 }
  33
  34 CUdevice cudaDeviceInit()
  35 {
  36     CUdevice cuDevice = 0;
  37     int deviceCount = 0;
  38     CUresult err = cuInit(0);
  39     char name[100];
  40     int major=0, minor=0;
  41
  42     if (CUDA_SUCCESS == err)
  43         checkCudaErrors(cuDeviceGetCount(&deviceCount));
  44     if (deviceCount == 0) {
  45         fprintf(stderr, "cudaDeviceInit error: no devices supporting CUDA\n");
  46         exit(-1);
  47     }
  48     checkCudaErrors(cuDeviceGet(&cuDevice, 0));
  49     cuDeviceGetName(name, 100, cuDevice);
  50     printf("Using CUDA Device [0]: %s\n", name);
  51
  52     checkCudaErrors( cuDeviceComputeCapability(&major, &minor, cuDevice) );
  53     if (major < 2) {
  54         fprintf(stderr, "Device 0 is not sm_20 or later\n");
  55         exit(-1);
  56     }
  57     return cuDevice;
  58 }
  59
  60
  61 CUresult initCUDA(CUcontext *phContext,
  62                   CUdevice *phDevice,
  63                   CUmodule *phModule,
  64                   CUfunction *phKernel,
  65                   const char *ptx)
  66 {
  67     // Initialize
  68     *phDevice = cudaDeviceInit();
  69
  70     // Create context on the device
  71     checkCudaErrors(cuCtxCreate(phContext, 0, *phDevice));
  72
  73     // Load the PTX
  74     checkCudaErrors(cuModuleLoadDataEx(phModule, ptx, 0, 0, 0));
  75
  76     // Locate the kernel entry poin
  77     checkCudaErrors(cuModuleGetFunction(phKernel, *phModule, "simple"));
  78
  79     return CUDA_SUCCESS;
  80 }
  81
  82 char *loadProgramSource(const char *filename, size_t *size)
  83 {
  84     struct stat statbuf;
  85     FILE *fh;
  86     char *source = NULL;
  87     *size = 0;
  88     fh = fopen(filename, "rb");
  89     if (fh) {
  90         stat(filename, &statbuf);
  91         source = (char *) malloc(statbuf.st_size+1);
  92         if (source) {
  93             fread(source, statbuf.st_size, 1, fh);
  94             source[statbuf.st_size] = 0;
  95             *size = statbuf.st_size+1;
  96         }
  97     }
  98     else {
  99         fprintf(stderr, "Error reading file %s\n", filename);
 100         exit(-1);
 101     }
 102     return source;
 103 }
 104
 105 char *generatePTX(const char *ll, size_t size, const char *filename)
 106 {
 107     nvvmResult result;
 108     nvvmProgram program;
 109     size_t PTXSize;
 110     char *PTX = NULL;
 111
 112     result = nvvmCreateProgram(&program);
 113     if (result != NVVM_SUCCESS) {
 114         fprintf(stderr, "nvvmCreateProgram: Failed\n");
 115         exit(-1);
 116     }
 117
 118     result = nvvmAddModuleToProgram(program, ll, size, filename);
 119     if (result != NVVM_SUCCESS) {
 120         fprintf(stderr, "nvvmAddModuleToProgram: Failed\n");
 121         exit(-1);
 122     }
 123
 124     result = nvvmCompileProgram(program,  0, NULL);
 125     if (result != NVVM_SUCCESS) {
 126         char *Msg = NULL;
 127         size_t LogSize;
 128         fprintf(stderr, "nvvmCompileProgram: Failed\n");
 129         nvvmGetProgramLogSize(program, &LogSize);
 130         Msg = (char*)malloc(LogSize);
 131         nvvmGetProgramLog(program, Msg);
 132         fprintf(stderr, "%s\n", Msg);
 133         free(Msg);
 134         exit(-1);
 135     }
 136
 137     result = nvvmGetCompiledResultSize(program, &PTXSize);
 138     if (result != NVVM_SUCCESS) {
 139         fprintf(stderr, "nvvmGetCompiledResultSize: Failed\n");
 140         exit(-1);
 141     }
 142
 143     PTX = (char*)malloc(PTXSize);
 144     result = nvvmGetCompiledResult(program, PTX);
 145     if (result != NVVM_SUCCESS) {
 146         fprintf(stderr, "nvvmGetCompiledResult: Failed\n");
 147         free(PTX);
 148         exit(-1);
 149     }
 150
 151     result = nvvmDestroyProgram(&program);
 152     if (result != NVVM_SUCCESS) {
 153       fprintf(stderr, "nvvmDestroyProgram: Failed\n");
 154       free(PTX);
 155       exit(-1);
 156     }
 157
 158     return PTX;
 159 }
 160
 161 int main(int argc, char **argv)
 162 {
 163     const unsigned int nThreads = 32;
 164     const unsigned int nBlocks  = 1;
 165     const size_t memSize = nThreads * nBlocks * sizeof(int);
 166
 167     CUcontext    hContext = 0;
 168     CUdevice     hDevice  = 0;
 169     CUmodule     hModule  = 0;
 170     CUfunction   hKernel  = 0;
 171     CUdeviceptr  d_data   = 0;
 172     int         *h_data   = 0;
 173     char        *ptx      = NULL;
 174     unsigned int i;
 175
 176     // Get the ll from file
 177     size_t size = 0;
 178     // Kernel parameters
 179     void *params[] = { &d_data };
 180 #if BUILD_64_BIT
 181     const char *filename = "simple-gpu64.ll";
 182 #else
 183     const char *filename = "simple-gpu.ll";
 184 #endif
 185     char *ll = loadProgramSource(filename, &size);
 186     fprintf(stdout, "NVVM IR ll file loaded\n");
 187
 188     // Use libnvvm to generte PTX
 189     ptx = generatePTX(ll, size, filename);
 190     fprintf(stdout, "PTX generated:\n");
 191     fprintf(stdout, "%s\n", ptx);
 192
 193     // Initialize the device and get a handle to the kernel
 194     checkCudaErrors(initCUDA(&hContext, &hDevice, &hModule, &hKernel, ptx));
 195
 196     // Allocate memory on host and device
 197     if ((h_data = (int *)malloc(memSize)) == NULL) {
 198         fprintf(stderr, "Could not allocate host memory\n");
 199         exit(-1);
 200     }
 201     checkCudaErrors(cuMemAlloc(&d_data, memSize));
 202
 203     // Launch the kernel
 204     checkCudaErrors(cuLaunchKernel(hKernel, nBlocks, 1, 1, nThreads, 1, 1,
 205                                    0, NULL, params, NULL));
 206     fprintf(stdout, "CUDA kernel launched\n");
 207
 208     // Copy the result back to the host
 209     checkCudaErrors(cuMemcpyDtoH(h_data, d_data, memSize));
 210
 211     // Print the result
 212     for (i = 0 ; i < nBlocks * nThreads ; i++) {
 213         fprintf(stdout, "%d ", h_data[i]);
 214     }
 215
 216     fprintf(stdout, "\n");
 217
 218     // Cleanup
 219     if (d_data) {
 220         checkCudaErrors(cuMemFree(d_data));
 221         d_data = 0;
 222     }
 223     if (h_data) {
 224         free(h_data);
 225         h_data = 0;
 226     }
 227     if (hModule) {
 228         checkCudaErrors(cuModuleUnload(hModule));
 229         hModule = 0;
 230     }
 231     if (hContext) {
 232         checkCudaErrors(cuCtxDestroy(hContext));
 233         hContext = 0;
 234     }
 235
 236     free(ll);
 237     free(ptx);
 238
 239     return 0;
 240 }