util/X86MAC64/cuda/samples/1_Utilities/deviceQuery/deviceQuery.cpp

   1 /*
   2  * Copyright 1993-2013 NVIDIA Corporation.  All rights reserved.
   3  *
   4  * Please refer to the NVIDIA end user license agreement (EULA) associated
   5  * with this source code for terms and conditions that govern your use of
   6  * this software. Any use, reproduction, disclosure, or distribution of
   7  * this software and related documentation outside the terms of the EULA
   8  * is strictly prohibited.
   9  *
  10  */
  11 /* This sample queries the properties of the CUDA devices present in the system via CUDA Runtime API. */
  12
  13 // Shared Utilities (QA Testing)
  14
  15 // std::system includes
  16 #include <memory>
  17 #include <iostream>
  18
  19 // CUDA-C includes
  20 #include <cuda.h>
  21 #include <cuda_runtime.h>
  22
  23 #include <helper_cuda.h>
  24
  25 int *pArgc = NULL;
  26 char **pArgv = NULL;
  27
  28 // This function wraps the CUDA Driver API into a template function
  29 template <class T>
  30 inline void getCudaAttribute(T *attribute, CUdevice_attribute device_attribute, int device)
  31 {
  32     CUresult error =    cuDeviceGetAttribute(attribute, device_attribute, device);
  33
  34     if (CUDA_SUCCESS != error)
  35     {
  36         fprintf(stderr, "cuSafeCallNoSync() Driver API error = %04d from file <%s>, line %i.\n",
  37                 error, __FILE__, __LINE__);
  38         exit(EXIT_FAILURE);
  39     }
  40 }
  41
  42
  43 inline bool IsGPUCapableP2P(cudaDeviceProp *pProp)
  44 {
  45 #ifdef _WIN32
  46     return (bool)(pProp->tccDriver ? true : false);
  47 #else
  48     return (bool)(pProp->major >= 2);
  49 #endif
  50 }
  51
  52 inline bool IsAppBuiltAs64()
  53 {
  54 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
  55     return 1;
  56 #else
  57     return 0;
  58 #endif
  59 }
  60
  61 ////////////////////////////////////////////////////////////////////////////////
  62 // Program main
  63 ////////////////////////////////////////////////////////////////////////////////
  64 int
  65 main(int argc, char **argv)
  66 {
  67     pArgc = &argc;
  68     pArgv = argv;
  69
  70     printf("%s Starting...\n\n", argv[0]);
  71     printf(" CUDA Device Query (Runtime API) version (CUDART static linking)\n\n");
  72
  73     int deviceCount = 0;
  74     cudaError_t error_id = cudaGetDeviceCount(&deviceCount);
  75
  76     if (error_id != cudaSuccess)
  77     {
  78         printf("cudaGetDeviceCount returned %d\n-> %s\n", (int)error_id, cudaGetErrorString(error_id));
  79                 printf("Result = FAIL\n");
  80         exit(EXIT_FAILURE);
  81     }
  82
  83     // This function call returns 0 if there are no CUDA capable devices.
  84     if (deviceCount == 0)
  85     {
  86         printf("There are no available device(s) that support CUDA\n");
  87     }
  88     else
  89     {
  90         printf("Detected %d CUDA Capable device(s)\n", deviceCount);
  91     }
  92
  93     int dev, driverVersion = 0, runtimeVersion = 0;
  94
  95     for (dev = 0; dev < deviceCount; ++dev)
  96     {
  97         cudaSetDevice(dev);
  98         cudaDeviceProp deviceProp;
  99         cudaGetDeviceProperties(&deviceProp, dev);
 100
 101         printf("\nDevice %d: \"%s\"\n", dev, deviceProp.name);
 102
 103         // Console log
 104         cudaDriverGetVersion(&driverVersion);
 105         cudaRuntimeGetVersion(&runtimeVersion);
 106         printf("  CUDA Driver Version / Runtime Version          %d.%d / %d.%d\n", driverVersion/1000, (driverVersion%100)/10, runtimeVersion/1000, (runtimeVersion%100)/10);
 107         printf("  CUDA Capability Major/Minor version number:    %d.%d\n", deviceProp.major, deviceProp.minor);
 108
 109         char msg[256];
 110         SPRINTF(msg, "  Total amount of global memory:                 %.0f MBytes (%llu bytes)\n",
 111                 (float)deviceProp.totalGlobalMem/1048576.0f, (unsigned long long) deviceProp.totalGlobalMem);
 112         printf("%s", msg);
 113
 114         printf("  (%2d) Multiprocessors, (%3d) CUDA Cores/MP:     %d CUDA Cores\n",
 115                deviceProp.multiProcessorCount,
 116                _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor),
 117                _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor) * deviceProp.multiProcessorCount);
 118         printf("  GPU Clock rate:                                %.0f MHz (%0.2f GHz)\n", deviceProp.clockRate * 1e-3f, deviceProp.clockRate * 1e-6f);
 119
 120
 121 #if CUDART_VERSION >= 5000
 122         // This is supported in CUDA 5.0 (runtime API device properties)
 123         printf("  Memory Clock rate:                             %.0f Mhz\n", deviceProp.memoryClockRate * 1e-3f);
 124         printf("  Memory Bus Width:                              %d-bit\n",   deviceProp.memoryBusWidth);
 125
 126         if (deviceProp.l2CacheSize)
 127         {
 128             printf("  L2 Cache Size:                                 %d bytes\n", deviceProp.l2CacheSize);
 129         }
 130 #else
 131         // This only available in CUDA 4.0-4.2 (but these were only exposed in the CUDA Driver API)
 132         int memoryClock;
 133         getCudaAttribute<int>(&memoryClock, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, dev);
 134         printf("  Memory Clock rate:                             %.0f Mhz\n", memoryClock * 1e-3f);
 135         int memBusWidth;
 136         getCudaAttribute<int>(&memBusWidth, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, dev);
 137         printf("  Memory Bus Width:                              %d-bit\n", memBusWidth);
 138         int L2CacheSize;
 139         getCudaAttribute<int>(&L2CacheSize, CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, dev);
 140
 141         if (L2CacheSize)
 142         {
 143             printf("  L2 Cache Size:                                 %d bytes\n", L2CacheSize);
 144         }
 145 #endif
 146
 147         printf("  Maximum Texture Dimension Size (x,y,z)         1D=(%d), 2D=(%d, %d), 3D=(%d, %d, %d)\n",
 148                deviceProp.maxTexture1D   , deviceProp.maxTexture2D[0], deviceProp.maxTexture2D[1],
 149                deviceProp.maxTexture3D[0], deviceProp.maxTexture3D[1], deviceProp.maxTexture3D[2]);
 150         printf("  Maximum Layered 1D Texture Size, (num) layers  1D=(%d), %d layers\n",
 151                deviceProp.maxTexture1DLayered[0], deviceProp.maxTexture1DLayered[1]);
 152                 printf("  Maximum Layered 2D Texture Size, (num) layers  2D=(%d, %d), %d layers\n",
 153                            deviceProp.maxTexture2DLayered[0], deviceProp.maxTexture2DLayered[1], deviceProp.maxTexture2DLayered[2]);
 154
 155
 156         printf("  Total amount of constant memory:               %lu bytes\n", deviceProp.totalConstMem);
 157         printf("  Total amount of shared memory per block:       %lu bytes\n", deviceProp.sharedMemPerBlock);
 158         printf("  Total number of registers available per block: %d\n", deviceProp.regsPerBlock);
 159         printf("  Warp size:                                     %d\n", deviceProp.warpSize);
 160         printf("  Maximum number of threads per multiprocessor:  %d\n", deviceProp.maxThreadsPerMultiProcessor);
 161         printf("  Maximum number of threads per block:           %d\n", deviceProp.maxThreadsPerBlock);
 162         printf("  Max dimension size of a thread block (x,y,z): (%d, %d, %d)\n",
 163                deviceProp.maxThreadsDim[0],
 164                deviceProp.maxThreadsDim[1],
 165                deviceProp.maxThreadsDim[2]);
 166         printf("  Max dimension size of a grid size    (x,y,z): (%d, %d, %d)\n",
 167                deviceProp.maxGridSize[0],
 168                deviceProp.maxGridSize[1],
 169                deviceProp.maxGridSize[2]);
 170         printf("  Maximum memory pitch:                          %lu bytes\n", deviceProp.memPitch);
 171         printf("  Texture alignment:                             %lu bytes\n", deviceProp.textureAlignment);
 172         printf("  Concurrent copy and kernel execution:          %s with %d copy engine(s)\n", (deviceProp.deviceOverlap ? "Yes" : "No"), deviceProp.asyncEngineCount);
 173         printf("  Run time limit on kernels:                     %s\n", deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No");
 174         printf("  Integrated GPU sharing Host Memory:            %s\n", deviceProp.integrated ? "Yes" : "No");
 175         printf("  Support host page-locked memory mapping:       %s\n", deviceProp.canMapHostMemory ? "Yes" : "No");
 176         printf("  Alignment requirement for Surfaces:            %s\n", deviceProp.surfaceAlignment ? "Yes" : "No");
 177         printf("  Device has ECC support:                        %s\n", deviceProp.ECCEnabled ? "Enabled" : "Disabled");
 178 #ifdef WIN32
 179         printf("  CUDA Device Driver Mode (TCC or WDDM):         %s\n", deviceProp.tccDriver ? "TCC (Tesla Compute Cluster Driver)" : "WDDM (Windows Display Driver Model)");
 180 #endif
 181         printf("  Device supports Unified Addressing (UVA):      %s\n", deviceProp.unifiedAddressing ? "Yes" : "No");
 182         printf("  Device PCI Bus ID / PCI location ID:           %d / %d\n", deviceProp.pciBusID, deviceProp.pciDeviceID);
 183
 184         const char *sComputeMode[] =
 185         {
 186             "Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)",
 187             "Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)",
 188             "Prohibited (no host thread can use ::cudaSetDevice() with this device)",
 189             "Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)",
 190             "Unknown",
 191             NULL
 192         };
 193         printf("  Compute Mode:\n");
 194         printf("     < %s >\n", sComputeMode[deviceProp.computeMode]);
 195     }
 196
 197     // If there are 2 or more GPUs, query to determine whether RDMA is supported
 198     if (deviceCount >= 2)
 199     {
 200         cudaDeviceProp prop[64];
 201         int gpuid[64]; // we want to find the first two GPU's that can support P2P
 202         int gpu_p2p_count = 0;
 203
 204         for (int i=0; i < deviceCount; i++)
 205         {
 206             checkCudaErrors(cudaGetDeviceProperties(&prop[i], i));
 207
 208             // Only boards based on Fermi or later can support P2P
 209             if ((prop[i].major >= 2)
 210 #ifdef _WIN32
 211                 // on Windows (64-bit), the Tesla Compute Cluster driver for windows must be enabled to supprot this
 212                 && prop[i].tccDriver
 213 #endif
 214                )
 215             {
 216                 // This is an array of P2P capable GPUs
 217                 gpuid[gpu_p2p_count++] = i;
 218             }
 219         }
 220
 221         // Show all the combinations of support P2P GPUs
 222         int can_access_peer_0_1, can_access_peer_1_0;
 223
 224         if (gpu_p2p_count >= 2)
 225         {
 226             for (int i = 0; i < gpu_p2p_count-1; i++)
 227             {
 228                 for (int j = 1; j < gpu_p2p_count; j++)
 229                 {
 230                     checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer_0_1, gpuid[i], gpuid[j]));
 231                     printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[i]].name, gpuid[i],
 232                            prop[gpuid[j]].name, gpuid[j] ,
 233                            can_access_peer_0_1 ? "Yes" : "No");
 234                 }
 235             }
 236
 237             for (int j = 1; j < gpu_p2p_count; j++)
 238             {
 239                 for (int i = 0; i < gpu_p2p_count-1; i++)
 240                 {
 241                     checkCudaErrors(cudaDeviceCanAccessPeer(&can_access_peer_1_0, gpuid[j], gpuid[i]));
 242                     printf("> Peer access from %s (GPU%d) -> %s (GPU%d) : %s\n", prop[gpuid[j]].name, gpuid[j],
 243                            prop[gpuid[i]].name, gpuid[i] ,
 244                            can_access_peer_1_0 ? "Yes" : "No");
 245                 }
 246             }
 247         }
 248     }
 249
 250     // csv masterlog info
 251     // *****************************
 252     // exe and CUDA driver name
 253     printf("\n");
 254     std::string sProfileString = "deviceQuery, CUDA Driver = CUDART";
 255     char cTemp[16];
 256
 257     // driver version
 258     sProfileString += ", CUDA Driver Version = ";
 259 #ifdef WIN32
 260     sprintf_s(cTemp, 10, "%d.%d", driverVersion/1000, (driverVersion%100)/10);
 261 #else
 262     sprintf(cTemp, "%d.%d", driverVersion/1000, (driverVersion%100)/10);
 263 #endif
 264     sProfileString +=  cTemp;
 265
 266     // Runtime version
 267     sProfileString += ", CUDA Runtime Version = ";
 268 #ifdef WIN32
 269     sprintf_s(cTemp, 10, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10);
 270 #else
 271     sprintf(cTemp, "%d.%d", runtimeVersion/1000, (runtimeVersion%100)/10);
 272 #endif
 273     sProfileString +=  cTemp;
 274
 275     // Device count
 276     sProfileString += ", NumDevs = ";
 277 #ifdef WIN32
 278     sprintf_s(cTemp, 10, "%d", deviceCount);
 279 #else
 280     sprintf(cTemp, "%d", deviceCount);
 281 #endif
 282     sProfileString += cTemp;
 283
 284     // Print Out all device Names
 285     for (dev = 0; dev < deviceCount; ++dev)
 286     {
 287 #ifdef _WIN32
 288         sprintf_s(cTemp, 13, ", Device%d = ", dev);
 289 #else
 290         sprintf(cTemp, ", Device%d = ", dev);
 291 #endif
 292         cudaDeviceProp deviceProp;
 293         cudaGetDeviceProperties(&deviceProp, dev);
 294         sProfileString += cTemp;
 295         sProfileString += deviceProp.name;
 296     }
 297
 298     sProfileString += "\n";
 299     printf("%s", sProfileString.c_str());
 300
 301         printf("Result = PASS\n");
 302
 303     // finish
 304     exit(EXIT_SUCCESS);
 305 }