2 * Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
4 * Please refer to the NVIDIA end user license agreement (EULA) associated
5 * with this source code for terms and conditions that govern your use of
6 * this software. Any use, reproduction, disclosure, or distribution of
7 * this software and related documentation outside the terms of the EULA
8 * is strictly prohibited.
11 /* This sample queries the properties of the CUDA devices present in the system via CUDA Runtime API. */
13 // Shared Utilities (QA Testing)
15 // std::system includes
21 #include <cuda_runtime.h>
23 #include <helper_cuda.h>
28 // This function wraps the CUDA Driver API into a template function
30 inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device)
32 CUresult error = cuDeviceGetAttribute(attribute, device_attribute, device);
34 if (CUDA_SUCCESS != error)
36 fprintf(stderr, "cuSafeCallNoSync() Driver API error = %04d from file <%s>, line %i.\n",
37 error, __FILE__, __LINE__);
43 inline bool IsGPUCapableP2P(cudaDeviceProp *pProp)
46 return (bool)(pProp->tccDriver ? true : false);
48 return (bool)(pProp->major >= 2);
52 inline bool IsAppBuiltAs64()
54 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
61 ////////////////////////////////////////////////////////////////////////////////
63 ////////////////////////////////////////////////////////////////////////////////
65 main(int argc, char **argv)
70 printf("%s Starting...\n\n", argv[0]);
71 printf(" CUDA Device Query (Runtime API) version (CUDART static linking)\n\n");
74 cudaError_t error_id = cudaGetDeviceCount(&deviceCount);
76 if (error_id != cudaSuccess)
78 printf("cudaGetDeviceCount returned %d\n-> %s\n", (int)error_id, cudaGetErrorString(error_id));
79 printf("Result = FAIL\n");
83 // This function call returns 0 if there are no CUDA capable devices.
86 printf("There are no available device(s) that support CUDA\n");
90 printf("Detected %d CUDA Capable device(s)\n", deviceCount);
93 int dev, driverVersion = 0, runtimeVersion = 0;
95 for (dev = 0; dev < deviceCount; ++dev)
98 cudaDeviceProp deviceProp;
99 cudaGetDeviceProperties(&deviceProp, dev);
101 printf("\nDevice %d: \"%s\"\n", dev, deviceProp.name);
104 cudaDriverGetVersion(&driverVersion);
105 cudaRuntimeGetVersion(&runtimeVersion);
106 printf(" CUDA Driver Version / Runtime Version %d.%d / %d.%d\n", driverVersion/1000, (driverVersion%100)/10, runtimeVersion/1000, (runtimeVersion%100)/10);
107 printf(" CUDA Capability Major/Minor version number: %d.%d\n", deviceProp.major, deviceProp.minor);
110 SPRINTF(msg, " Total amount of global memory: %.0f MBytes (%llu bytes)\n",
111 (float)deviceProp.totalGlobalMem/1048576.0f, (unsigned long long) deviceProp.totalGlobalMem);
114 printf(" (%2d) Multiprocessors, (%3d) CUDA Cores/MP: %d CUDA Cores\n",
115 deviceProp.multiProcessorCount,
116 _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor),
117 _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor) * deviceProp.multiProcessorCount);
118 printf(" GPU Clock rate: %.0f MHz (%0.2f GHz)\n", deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f);
121 #if CUDART_VERSION >= 5000
122 // This is supported in CUDA 5.0 (runtime API device properties)
123 printf(" Memory Clock rate: %.0f Mhz\n", deviceProp.memoryClockRate * 1e-3f);
124 printf(" Memory Bus Width: %d-bit\n", deviceProp.memoryBusWidth);
126 if (deviceProp.l2CacheSize)
128 printf(" L2 Cache Size: %d bytes\n", deviceProp.l2CacheSize);
131 // This only available in CUDA 4.0-4.2 (but these were only exposed in the CUDA Driver API)
133 getCudaAttribute<int>(&memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev);
134 printf(" Memory Clock rate: %.0f Mhz\n", memoryClock * 1e-3f);
136 getCudaAttribute<int>(&memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev);
137 printf(" Memory Bus Width: %d-bit\n", memBusWidth);
139 getCudaAttribute<int>(&L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev);
143 printf(" L2 Cache Size: %d bytes\n", L2CacheSize);
147 printf(" Maximum Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d, %d), 3D=(%d, %d, %d)\n",
148 deviceProp.maxTexture1D , deviceProp.maxTexture2D[0], deviceProp.maxTexture2D[1],
149 deviceProp.maxTexture3D[0], deviceProp.maxTexture3D[1], deviceProp.maxTexture3D[2]);
150 printf(" Maximum Layered 1D Texture Size, (num) layers 1D=(%d), %d layers\n",
151 deviceProp.maxTexture1DLayered[0], deviceProp.maxTexture1DLayered[1]);
152 printf(" Maximum Layered 2D Texture Size, (num) layers 2D=(%d, %d), %d layers\n",
153 deviceProp.maxTexture2DLayered[0], deviceProp.maxTexture2DLayered[1], deviceProp.maxTexture2DLayered[2]);
156 printf(" Total amount of constant memory: %lu bytes\n", deviceProp.totalConstMem);
157 printf(" Total amount of shared memory per block: %lu bytes\n", deviceProp.sharedMemPerBlock);
158 printf(" Total number of registers available per block: %d\n", deviceProp.regsPerBlock);
159 printf(" Warp size: %d\n", deviceProp.warpSize);
160 printf(" Maximum number of threads per multiprocessor: %d\n", deviceProp.maxThreadsPerMultiProcessor);
161 printf(" Maximum number of threads per block: %d\n", deviceProp.maxThreadsPerBlock);
162 printf(" Max dimension size of a thread block (x,y,z): (%d, %d, %d)\n",
163 deviceProp.maxThreadsDim[0],
164 deviceProp.maxThreadsDim[1],
165 deviceProp.maxThreadsDim[2]);
166 printf(" Max dimension size of a grid size (x,y,z): (%d, %d, %d)\n",
167 deviceProp.maxGridSize[0],
168 deviceProp.maxGridSize[1],
169 deviceProp.maxGridSize[2]);
170 printf(" Maximum memory pitch: %lu bytes\n", deviceProp.memPitch);
171 printf(" Texture alignment: %lu bytes\n", deviceProp.textureAlignment);
172 printf(" Concurrent copy and kernel execution: %s with %d copy engine(s)\n", (deviceProp.deviceOverlap ? "Yes" : "No"), deviceProp.asyncEngineCount);
173 printf(" Run time limit on kernels: %s\n", deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No");
174 printf(" Integrated GPU sharing Host Memory: %s\n", deviceProp.integrated ? "Yes" : "No");
175 printf(" Support host page-locked memory mapping: %s\n", deviceProp.canMapHostMemory ? "Yes" : "No");
176 printf(" Alignment requirement for Surfaces: %s\n", deviceProp.surfaceAlignment ? "Yes" : "No");
177 printf(" Device has ECC support: %s\n", deviceProp.ECCEnabled ? "Enabled" : "Disabled");
179 printf(" CUDA Device Driver Mode (TCC or WDDM): %s\n", deviceProp.tccDriver ? "TCC (Tesla Compute Cluster Driver)" : "WDDM (Windows Display Driver Model)");
181 printf(" Device supports Unified Addressing (UVA): %s\n", deviceProp.unifiedAddressing ? "Yes" : "No");
182 printf(" Device PCI Bus ID / PCI location ID: %d / %d\n", deviceProp.pciBusID, deviceProp.pciDeviceID);
184 const char *sComputeMode[] =
186 "Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
187 "Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)",
188 "Prohibited (no host thread can use ::cudaSetDevice() with this device)",
189 "Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)",
193 printf(" Compute Mode:\n");
194 printf(" < %s >\n", sComputeMode[deviceProp.computeMode]);
197 // If there are 2 or more GPUs, query to determine whether RDMA is supported
198 if (deviceCount >= 2)
200 cudaDeviceProp prop[64];
201 int gpuid[64]; // we want to find the first two GPU's that can support P2P
202 int gpu_p2p_count = 0;
204 for (int i=0; i < deviceCount; i++)
206 checkCudaErrors(cudaGetDeviceProperties(&prop[i], i));
208 // Only boards based on Fermi or later can support P2P
209 if ((prop[i].major >= 2)
211 // on Windows (64-bit), the Tesla Compute Cluster driver for windows must be enabled to supprot this
216 // This is an array of P2P capable GPUs
217 gpuid[gpu_p2p_count++] = i;
221 // Show all the combinations of support P2P GPUs
222 int can_access_peer_0_1, can_access_peer_1_0;
224 if (gpu_p2p_count >= 2)
226 for (int i = 0; i < gpu_p2p_count-1; i++)
228 for (int j = 1; j < gpu_p2p_count; j++)
230 checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer_0_1, gpuid[i], gpuid[j]));
231 printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[i]].name, gpuid[i],
232 prop[gpuid[j]].name, gpuid[j] ,
233 can_access_peer_0_1 ? "Yes" : "No");
237 for (int j = 1; j < gpu_p2p_count; j++)
239 for (int i = 0; i < gpu_p2p_count-1; i++)
241 checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer_1_0, gpuid[j], gpuid[i]));
242 printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[j]].name, gpuid[j],
243 prop[gpuid[i]].name, gpuid[i] ,
244 can_access_peer_1_0 ? "Yes" : "No");
250 // csv masterlog info
251 // *****************************
252 // exe and CUDA driver name
254 std::string sProfileString = "deviceQuery, CUDA Driver = CUDART";
258 sProfileString += ", CUDA Driver Version = ";
260 sprintf_s(cTemp, 10, "%d.%d", driverVersion/1000, (driverVersion%100)/10);
262 sprintf(cTemp, "%d.%d", driverVersion/1000, (driverVersion%100)/10);
264 sProfileString += cTemp;
267 sProfileString += ", CUDA Runtime Version = ";
269 sprintf_s(cTemp, 10, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10);
271 sprintf(cTemp, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10);
273 sProfileString += cTemp;
276 sProfileString += ", NumDevs = ";
278 sprintf_s(cTemp, 10, "%d", deviceCount);
280 sprintf(cTemp, "%d", deviceCount);
282 sProfileString += cTemp;
284 // Print Out all device Names
285 for (dev = 0; dev < deviceCount; ++dev)
288 sprintf_s(cTemp, 13, ", Device%d = ", dev);
290 sprintf(cTemp, ", Device%d = ", dev);
292 cudaDeviceProp deviceProp;
293 cudaGetDeviceProperties(&deviceProp, dev);
294 sProfileString += cTemp;
295 sProfileString += deviceProp.name;
298 sProfileString += "\n";
299 printf("%s", sProfileString.c_str());
301 printf("Result = PASS\n");