OSDN Git Service

modified: utilsrc/src/Admin/Makefile
[eos/others.git] / utiltools / X86MAC64 / cuda / samples / 0_Simple / simpleSeparateCompilation / simpleSeparateCompilation.cu
1 /*
2  * Copyright 1993-2013 NVIDIA Corporation.  All rights reserved.
3  *
4  * Please refer to the NVIDIA end user license agreement (EULA) associated
5  * with this source code for terms and conditions that govern your use of
6  * this software. Any use, reproduction, disclosure, or distribution of
7  * this software and related documentation outside the terms of the EULA
8  * is strictly prohibited.
9  *
10  */
11
12 // System includes.
13 #include <stdio.h>
14 #include <iostream>
15
16 // STL.
17 #include <vector>
18
19 // CUDA runtime.
20 #include <cuda_runtime.h>
21
22 // Helper functions and utilities to work with CUDA.
23 #include <helper_functions.h>
24 #include <helper_cuda.h>
25
26 // Device library includes.
27 #include "simpleDeviceLibrary.cuh"
28
29 using std::cout;
30 using std::endl;
31
32 using std::vector;
33
34 #define EPS 1e-5
35
36 typedef unsigned int uint;
37 typedef float(*deviceFunc)(float);
38
39 const char *sampleName = "simpleSeparateCompilation";
40
41 ////////////////////////////////////////////////////////////////////////////////
42 // Auto-Verification Code
43 bool testResult = true;
44
45 ////////////////////////////////////////////////////////////////////////////////
46 // Static device pointers to __device__ functions.
47 __device__ deviceFunc dMultiplyByTwoPtr = multiplyByTwo;
48 __device__ deviceFunc dDivideByTwoPtr = divideByTwo;
49
50 ////////////////////////////////////////////////////////////////////////////////
51 // Kernels
52 ////////////////////////////////////////////////////////////////////////////////
53 //! Transforms vector.
54 //! Applies the __device__ function "f" to each element of the vector "v".
55 ////////////////////////////////////////////////////////////////////////////////
56 __global__ void transformVector(float *v, deviceFunc f, uint size)
57 {
58     uint tid = blockIdx.x * blockDim.x + threadIdx.x;
59
60     if (tid < size)
61     {
62         v[tid] = (*f)(v[tid]);
63     }
64 }
65
66 ////////////////////////////////////////////////////////////////////////////////
67 // Declaration, forward
68 void runTest(int argc, const char **argv);
69
70 ////////////////////////////////////////////////////////////////////////////////
71 // Program main
72 ////////////////////////////////////////////////////////////////////////////////
73 int main(int argc, char **argv)
74 {
75     cout << sampleName << " starting..." << endl;
76
77     runTest(argc, (const char **)argv);
78
79     cout << sampleName << " completed, returned "
80          << (testResult ? "OK" : "ERROR") << endl;
81
82     exit(testResult ? EXIT_SUCCESS : EXIT_FAILURE);
83 }
84
85
86 void runTest(int argc, const char **argv)
87 {
88     try
89     {
90         int devID;
91
92         //cudaError_t error;
93         cudaDeviceProp deviceProp;
94
95         // This will pick the best possible CUDA capable device.
96         devID = findCudaDevice(argc, (const char **) argv);
97
98         checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
99
100         if (deviceProp.major < 2)
101         {
102             cout << sampleName
103                  << " requires a GPU with compute capability "
104                  << "2.0 or later, exiting..." << endl;
105
106             cudaDeviceReset();
107             exit(EXIT_SUCCESS);
108         }
109
110         // Create host vector.
111         const uint kVectorSize = 1000;
112
113         vector<float> hVector(kVectorSize);
114
115         for (uint i = 0; i < kVectorSize; ++i)
116         {
117             hVector[i] = rand() / static_cast<float>(RAND_MAX);
118         }
119
120         // Create and populate device vector.
121         float *dVector;
122         checkCudaErrors(cudaMalloc(&dVector, kVectorSize * sizeof(float)));
123
124         checkCudaErrors(cudaMemcpy(dVector,
125                                    &hVector[0],
126                                    kVectorSize * sizeof(float),
127                                    cudaMemcpyHostToDevice));
128
129         // Kernel configuration, where a one-dimensional
130         // grid and one-dimensional blocks are configured.
131         const int nThreads = 1024;
132         const int nBlocks = 1;
133
134         dim3 dimGrid(nBlocks);
135         dim3 dimBlock(nThreads);
136
137         // Test library functions.
138         deviceFunc hFunctionPtr;
139
140         cudaMemcpyFromSymbol(&hFunctionPtr,
141                              dMultiplyByTwoPtr,
142                              sizeof(deviceFunc));
143         transformVector<<<dimGrid, dimBlock>>>
144         (dVector, hFunctionPtr, kVectorSize);
145         checkCudaErrors(cudaGetLastError());
146
147         cudaMemcpyFromSymbol(&hFunctionPtr,
148                              dDivideByTwoPtr,
149                              sizeof(deviceFunc));
150         transformVector<<<dimGrid, dimBlock>>>
151         (dVector, hFunctionPtr, kVectorSize);
152         checkCudaErrors(cudaGetLastError());
153
154         // Download results.
155         vector<float> hResultVector(kVectorSize);
156
157         checkCudaErrors(cudaMemcpy(&hResultVector[0],
158                                    dVector,
159                                    kVectorSize *sizeof(float),
160                                    cudaMemcpyDeviceToHost));
161
162         // Check results.
163         for (int i = 0; i < kVectorSize; ++i)
164         {
165             if (fabs(hVector[i] - hResultVector[i]) > EPS)
166             {
167                 cout << "Computations were incorrect..." << endl;
168                 testResult = false;
169                 break;
170             }
171         }
172
173         // Free resources.
174         if (dVector) checkCudaErrors(cudaFree(dVector));
175         checkCudaErrors(cudaDeviceReset());
176     }
177     catch (...)
178     {
179         cout << "Error occured, exiting..." << endl;
180         cudaDeviceReset();
181         exit(EXIT_FAILURE);
182     }
183 }
184