utiltools/X86MAC64/cuda/samples/0_Simple/simpleSeparateCompilation/simpleSeparateCompilation.cu

   1 /*
   2  * Copyright 1993-2013 NVIDIA Corporation.  All rights reserved.
   3  *
   4  * Please refer to the NVIDIA end user license agreement (EULA) associated
   5  * with this source code for terms and conditions that govern your use of
   6  * this software. Any use, reproduction, disclosure, or distribution of
   7  * this software and related documentation outside the terms of the EULA
   8  * is strictly prohibited.
   9  *
  10  */
  11
  12 // System includes.
  13 #include <stdio.h>
  14 #include <iostream>
  15
  16 // STL.
  17 #include <vector>
  18
  19 // CUDA runtime.
  20 #include <cuda_runtime.h>
  21
  22 // Helper functions and utilities to work with CUDA.
  23 #include <helper_functions.h>
  24 #include <helper_cuda.h>
  25
  26 // Device library includes.
  27 #include "simpleDeviceLibrary.cuh"
  28
  29 using std::cout;
  30 using std::endl;
  31
  32 using std::vector;
  33
  34 #define EPS 1e-5
  35
  36 typedef unsigned int uint;
  37 typedef float(*deviceFunc)(float);
  38
  39 const char *sampleName = "simpleSeparateCompilation";
  40
  41 ////////////////////////////////////////////////////////////////////////////////
  42 // Auto-Verification Code
  43 bool testResult = true;
  44
  45 ////////////////////////////////////////////////////////////////////////////////
  46 // Static device pointers to __device__ functions.
  47 __device__ deviceFunc dMultiplyByTwoPtr = multiplyByTwo;
  48 __device__ deviceFunc dDivideByTwoPtr = divideByTwo;
  49
  50 ////////////////////////////////////////////////////////////////////////////////
  51 // Kernels
  52 ////////////////////////////////////////////////////////////////////////////////
  53 //! Transforms vector.
  54 //! Applies the __device__ function "f" to each element of the vector "v".
  55 ////////////////////////////////////////////////////////////////////////////////
  56 __global__ void transformVector(float *v, deviceFunc f, uint size)
  57 {
  58     uint tid = blockIdx.x * blockDim.x + threadIdx.x;
  59
  60     if (tid < size)
  61     {
  62         v[tid] = (*f)(v[tid]);
  63     }
  64 }
  65
  66 ////////////////////////////////////////////////////////////////////////////////
  67 // Declaration, forward
  68 void runTest(int argc, const char **argv);
  69
  70 ////////////////////////////////////////////////////////////////////////////////
  71 // Program main
  72 ////////////////////////////////////////////////////////////////////////////////
  73 int main(int argc, char **argv)
  74 {
  75     cout << sampleName << " starting..." << endl;
  76
  77     runTest(argc, (const char **)argv);
  78
  79     cout << sampleName << " completed, returned "
  80          << (testResult ? "OK" : "ERROR") << endl;
  81
  82     exit(testResult ? EXIT_SUCCESS : EXIT_FAILURE);
  83 }
  84
  85
  86 void runTest(int argc, const char **argv)
  87 {
  88     try
  89     {
  90         int devID;
  91
  92         //cudaError_t error;
  93         cudaDeviceProp deviceProp;
  94
  95         // This will pick the best possible CUDA capable device.
  96         devID = findCudaDevice(argc, (const char **) argv);
  97
  98         checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
  99
 100         if (deviceProp.major < 2)
 101         {
 102             cout << sampleName
 103                  << " requires a GPU with compute capability "
 104                  << "2.0 or later, exiting..." << endl;
 105
 106             cudaDeviceReset();
 107             exit(EXIT_SUCCESS);
 108         }
 109
 110         // Create host vector.
 111         const uint kVectorSize = 1000;
 112
 113         vector<float> hVector(kVectorSize);
 114
 115         for (uint i = 0; i < kVectorSize; ++i)
 116         {
 117             hVector[i] = rand() / static_cast<float>(RAND_MAX);
 118         }
 119
 120         // Create and populate device vector.
 121         float *dVector;
 122         checkCudaErrors(cudaMalloc(&dVector, kVectorSize * sizeof(float)));
 123
 124         checkCudaErrors(cudaMemcpy(dVector,
 125                                    &hVector[0],
 126                                    kVectorSize * sizeof(float),
 127                                    cudaMemcpyHostToDevice));
 128
 129         // Kernel configuration, where a one-dimensional
 130         // grid and one-dimensional blocks are configured.
 131         const int nThreads = 1024;
 132         const int nBlocks = 1;
 133
 134         dim3 dimGrid(nBlocks);
 135         dim3 dimBlock(nThreads);
 136
 137         // Test library functions.
 138         deviceFunc hFunctionPtr;
 139
 140         cudaMemcpyFromSymbol(&hFunctionPtr,
 141                              dMultiplyByTwoPtr,
 142                              sizeof(deviceFunc));
 143         transformVector<<<dimGrid, dimBlock>>>
 144         (dVector, hFunctionPtr, kVectorSize);
 145         checkCudaErrors(cudaGetLastError());
 146
 147         cudaMemcpyFromSymbol(&hFunctionPtr,
 148                              dDivideByTwoPtr,
 149                              sizeof(deviceFunc));
 150         transformVector<<<dimGrid, dimBlock>>>
 151         (dVector, hFunctionPtr, kVectorSize);
 152         checkCudaErrors(cudaGetLastError());
 153
 154         // Download results.
 155         vector<float> hResultVector(kVectorSize);
 156
 157         checkCudaErrors(cudaMemcpy(&hResultVector[0],
 158                                    dVector,
 159                                    kVectorSize *sizeof(float),
 160                                    cudaMemcpyDeviceToHost));
 161
 162         // Check results.
 163         for (int i = 0; i < kVectorSize; ++i)
 164         {
 165             if (fabs(hVector[i] - hResultVector[i]) > EPS)
 166             {
 167                 cout << "Computations were incorrect..." << endl;
 168                 testResult = false;
 169                 break;
 170             }
 171         }
 172
 173         // Free resources.
 174         if (dVector) checkCudaErrors(cudaFree(dVector));
 175         checkCudaErrors(cudaDeviceReset());
 176     }
 177     catch (...)
 178     {
 179         cout << "Error occured, exiting..." << endl;
 180         cudaDeviceReset();
 181         exit(EXIT_FAILURE);
 182     }
 183 }
 184