2 * Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
4 * Please refer to the NVIDIA end user license agreement (EULA) associated
5 * with this source code for terms and conditions that govern your use of
6 * this software. Any use, reproduction, disclosure, or distribution of
7 * this software and related documentation outside the terms of the EULA
8 * is strictly prohibited.
20 #include <cuda_runtime.h>
22 // Helper functions and utilities to work with CUDA.
23 #include <helper_functions.h>
24 #include <helper_cuda.h>
26 // Device library includes.
27 #include "simpleDeviceLibrary.cuh"
36 typedef unsigned int uint;
37 typedef float(*deviceFunc)(float);
39 const char *sampleName = "simpleSeparateCompilation";
41 ////////////////////////////////////////////////////////////////////////////////
42 // Auto-Verification Code
43 bool testResult = true;
45 ////////////////////////////////////////////////////////////////////////////////
46 // Static device pointers to __device__ functions.
47 __device__ deviceFunc dMultiplyByTwoPtr = multiplyByTwo;
48 __device__ deviceFunc dDivideByTwoPtr = divideByTwo;
50 ////////////////////////////////////////////////////////////////////////////////
52 ////////////////////////////////////////////////////////////////////////////////
53 //! Transforms vector.
54 //! Applies the __device__ function "f" to each element of the vector "v".
55 ////////////////////////////////////////////////////////////////////////////////
56 __global__ void transformVector(float *v, deviceFunc f, uint size)
58 uint tid = blockIdx.x * blockDim.x + threadIdx.x;
62 v[tid] = (*f)(v[tid]);
66 ////////////////////////////////////////////////////////////////////////////////
67 // Declaration, forward
68 void runTest(int argc, const char **argv);
70 ////////////////////////////////////////////////////////////////////////////////
72 ////////////////////////////////////////////////////////////////////////////////
73 int main(int argc, char **argv)
75 cout << sampleName << " starting..." << endl;
77 runTest(argc, (const char **)argv);
79 cout << sampleName << " completed, returned "
80 << (testResult ? "OK" : "ERROR") << endl;
82 exit(testResult ? EXIT_SUCCESS : EXIT_FAILURE);
86 void runTest(int argc, const char **argv)
93 cudaDeviceProp deviceProp;
95 // This will pick the best possible CUDA capable device.
96 devID = findCudaDevice(argc, (const char **) argv);
98 checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
100 if (deviceProp.major < 2)
103 << " requires a GPU with compute capability "
104 << "2.0 or later, exiting..." << endl;
110 // Create host vector.
111 const uint kVectorSize = 1000;
113 vector<float> hVector(kVectorSize);
115 for (uint i = 0; i < kVectorSize; ++i)
117 hVector[i] = rand() / static_cast<float>(RAND_MAX);
120 // Create and populate device vector.
122 checkCudaErrors(cudaMalloc(&dVector, kVectorSize * sizeof(float)));
124 checkCudaErrors(cudaMemcpy(dVector,
126 kVectorSize * sizeof(float),
127 cudaMemcpyHostToDevice));
129 // Kernel configuration, where a one-dimensional
130 // grid and one-dimensional blocks are configured.
131 const int nThreads = 1024;
132 const int nBlocks = 1;
134 dim3 dimGrid(nBlocks);
135 dim3 dimBlock(nThreads);
137 // Test library functions.
138 deviceFunc hFunctionPtr;
140 cudaMemcpyFromSymbol(&hFunctionPtr,
143 transformVector<<<dimGrid, dimBlock>>>
144 (dVector, hFunctionPtr, kVectorSize);
145 checkCudaErrors(cudaGetLastError());
147 cudaMemcpyFromSymbol(&hFunctionPtr,
150 transformVector<<<dimGrid, dimBlock>>>
151 (dVector, hFunctionPtr, kVectorSize);
152 checkCudaErrors(cudaGetLastError());
155 vector<float> hResultVector(kVectorSize);
157 checkCudaErrors(cudaMemcpy(&hResultVector[0],
159 kVectorSize *sizeof(float),
160 cudaMemcpyDeviceToHost));
163 for (int i = 0; i < kVectorSize; ++i)
165 if (fabs(hVector[i] - hResultVector[i]) > EPS)
167 cout << "Computations were incorrect..." << endl;
174 if (dVector) checkCudaErrors(cudaFree(dVector));
175 checkCudaErrors(cudaDeviceReset());
179 cout << "Error occured, exiting..." << endl;