2 * Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
4 * Please refer to the NVIDIA end user license agreement (EULA) associated
5 * with this source code for terms and conditions that govern your use of
6 * this software. Any use, reproduction, disclosure, or distribution of
7 * this software and related documentation outside the terms of the EULA
8 * is strictly prohibited.
12 /* A simple program demonstrating trivial use of global memory atomic
13 * device functions (atomic*() functions).
23 # define WINDOWS_LEAN_AND_MEAN
29 #include <cuda_runtime.h>
31 // Utilities and timing functions
32 #include <helper_functions.h> // includes cuda.h and cuda_runtime_api.h
34 // CUDA helper functions
35 #include <helper_cuda.h> // helper functions for CUDA error check
38 #include "simpleAtomicIntrinsics_kernel.cuh"
40 const char *sampleName = "simpleAtomicIntrinsics";
42 ////////////////////////////////////////////////////////////////////////////////
43 // Auto-Verification Code
44 bool testResult = true;
46 ////////////////////////////////////////////////////////////////////////////////
47 // Declaration, forward
48 void runTest(int argc, char **argv);
50 extern "C" bool computeGold(int *gpuData, const int len);
52 ////////////////////////////////////////////////////////////////////////////////
54 ////////////////////////////////////////////////////////////////////////////////
55 int main(int argc, char **argv)
57 printf("%s starting...\n", sampleName);
62 printf("%s completed, returned %s",
64 testResult ? "OK" : "ERROR!");
65 exit(testResult ? EXIT_SUCCESS : EXIT_FAILURE);
68 ////////////////////////////////////////////////////////////////////////////////
69 //! Run a simple test for CUDA
70 ////////////////////////////////////////////////////////////////////////////////
71 void runTest(int argc, char **argv)
73 cudaDeviceProp deviceProp;
78 // This will pick the best possible CUDA capable device
79 dev = findCudaDevice(argc, (const char **)argv);
81 checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev));
83 // Statistics about the GPU device
84 printf("> GPU device has %d Multi-Processors, "
85 "SM %d.%d compute capabilities\n\n",
86 deviceProp.multiProcessorCount, deviceProp.major, deviceProp.minor);
88 int version = (deviceProp.major * 0x10 + deviceProp.minor);
92 printf("%s: requires a minimum CUDA compute 1.1 capability, waiving testing.\n",
97 StopWatchInterface *timer;
98 sdkCreateTimer(&timer);
99 sdkStartTimer(&timer);
101 unsigned int numThreads = 256;
102 unsigned int numBlocks = 64;
103 unsigned int numData = 11;
104 unsigned int memSize = sizeof(int) * numData;
106 //allocate mem for the result on host side
107 int *hOData = (int *) malloc(memSize);
109 //initalize the memory
110 for (unsigned int i = 0; i < numData; i++)
113 //To make the AND and XOR tests generate something other than 0...
114 hOData[8] = hOData[10] = 0xff;
116 // allocate device memory for result
118 checkCudaErrors(cudaMalloc((void **) &dOData, memSize));
119 // copy host memory to device to initialize to zers
120 checkCudaErrors(cudaMemcpy(dOData,
123 cudaMemcpyHostToDevice));
125 // execute the kernel
126 testKernel<<<numBlocks, numThreads>>>(dOData);
127 getLastCudaError("Kernel execution failed");
129 //Copy result from device to host
130 checkCudaErrors(cudaMemcpy(hOData,
133 cudaMemcpyDeviceToHost));
135 sdkStopTimer(&timer);
136 printf("Processing time: %f (ms)\n", sdkGetTimerValue(&timer));
137 sdkDeleteTimer(&timer);
139 // Compute reference solution
140 testResult = computeGold(hOData, numThreads * numBlocks);
144 checkCudaErrors(cudaFree(dOData));