utiltools/X86MAC64/cuda/samples/0_Simple/simpleAtomicIntrinsics/simpleAtomicIntrinsics.cu

   1 /*
   2  * Copyright 1993-2013 NVIDIA Corporation.  All rights reserved.
   3  *
   4  * Please refer to the NVIDIA end user license agreement (EULA) associated
   5  * with this source code for terms and conditions that govern your use of
   6  * this software. Any use, reproduction, disclosure, or distribution of
   7  * this software and related documentation outside the terms of the EULA
   8  * is strictly prohibited.
   9  *
  10  */
  11
  12 /* A simple program demonstrating trivial use of global memory atomic
  13  * device functions (atomic*() functions).
  14  */
  15
  16 // includes, system
  17 #include <stdlib.h>
  18 #include <stdio.h>
  19 #include <string.h>
  20 #include <math.h>
  21
  22 #ifdef _WIN32
  23 #  define WINDOWS_LEAN_AND_MEAN
  24 #  define NOMINMAX
  25 #  include <windows.h>
  26 #endif
  27
  28 // Includes CUDA
  29 #include <cuda_runtime.h>
  30
  31 // Utilities and timing functions
  32 #include <helper_functions.h>    // includes cuda.h and cuda_runtime_api.h
  33
  34 // CUDA helper functions
  35 #include <helper_cuda.h>         // helper functions for CUDA error check
  36
  37 // Includes, kernels
  38 #include "simpleAtomicIntrinsics_kernel.cuh"
  39
  40 const char *sampleName = "simpleAtomicIntrinsics";
  41
  42 ////////////////////////////////////////////////////////////////////////////////
  43 // Auto-Verification Code
  44 bool testResult = true;
  45
  46 ////////////////////////////////////////////////////////////////////////////////
  47 // Declaration, forward
  48 void runTest(int argc, char **argv);
  49
  50 extern "C" bool computeGold(int *gpuData, const int len);
  51
  52 ////////////////////////////////////////////////////////////////////////////////
  53 // Program main
  54 ////////////////////////////////////////////////////////////////////////////////
  55 int main(int argc, char **argv)
  56 {
  57     printf("%s starting...\n", sampleName);
  58
  59     runTest(argc, argv);
  60
  61     cudaDeviceReset();
  62     printf("%s completed, returned %s",
  63            sampleName,
  64            testResult ? "OK" : "ERROR!");
  65     exit(testResult ? EXIT_SUCCESS : EXIT_FAILURE);
  66 }
  67
  68 ////////////////////////////////////////////////////////////////////////////////
  69 //! Run a simple test for CUDA
  70 ////////////////////////////////////////////////////////////////////////////////
  71 void runTest(int argc, char **argv)
  72 {
  73     cudaDeviceProp deviceProp;
  74     deviceProp.major = 0;
  75     deviceProp.minor = 0;
  76     int dev = 0;
  77
  78     // This will pick the best possible CUDA capable device
  79     dev = findCudaDevice(argc, (const char **)argv);
  80
  81     checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev));
  82
  83     // Statistics about the GPU device
  84     printf("> GPU device has %d Multi-Processors, "
  85            "SM %d.%d compute capabilities\n\n",
  86            deviceProp.multiProcessorCount, deviceProp.major, deviceProp.minor);
  87
  88     int version = (deviceProp.major * 0x10 + deviceProp.minor);
  89
  90     if (version < 0x11)
  91     {
  92         printf("%s: requires a minimum CUDA compute 1.1 capability, waiving testing.\n",
  93                sampleName);
  94         exit(EXIT_WAIVED);
  95     }
  96
  97     StopWatchInterface *timer;
  98     sdkCreateTimer(&timer);
  99     sdkStartTimer(&timer);
 100
 101     unsigned int numThreads = 256;
 102     unsigned int numBlocks = 64;
 103     unsigned int numData = 11;
 104     unsigned int memSize = sizeof(int) * numData;
 105
 106     //allocate mem for the result on host side
 107     int *hOData = (int *) malloc(memSize);
 108
 109     //initalize the memory
 110     for (unsigned int i = 0; i < numData; i++)
 111         hOData[i] = 0;
 112
 113     //To make the AND and XOR tests generate something other than 0...
 114     hOData[8] = hOData[10] = 0xff;
 115
 116     // allocate device memory for result
 117     int *dOData;
 118     checkCudaErrors(cudaMalloc((void **) &dOData, memSize));
 119     // copy host memory to device to initialize to zers
 120     checkCudaErrors(cudaMemcpy(dOData,
 121                                hOData,
 122                                memSize,
 123                                cudaMemcpyHostToDevice));
 124
 125     // execute the kernel
 126     testKernel<<<numBlocks, numThreads>>>(dOData);
 127     getLastCudaError("Kernel execution failed");
 128
 129     //Copy result from device to host
 130     checkCudaErrors(cudaMemcpy(hOData,
 131                                dOData,
 132                                memSize,
 133                                cudaMemcpyDeviceToHost));
 134
 135     sdkStopTimer(&timer);
 136     printf("Processing time: %f (ms)\n", sdkGetTimerValue(&timer));
 137     sdkDeleteTimer(&timer);
 138
 139     // Compute reference solution
 140     testResult = computeGold(hOData, numThreads * numBlocks);
 141
 142     // Cleanup memory
 143     free(hOData);
 144     checkCudaErrors(cudaFree(dOData));
 145 }