2 * Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
4 * Please refer to the NVIDIA end user license agreement (EULA) associated
5 * with this source code for terms and conditions that govern your use of
6 * this software. Any use, reproduction, disclosure, or distribution of
7 * this software and related documentation outside the terms of the EULA
8 * is strictly prohibited.
13 * This sample demonstrates the use of CURAND to generate
14 * random numbers on GPU and CPU.
17 // Utilities and system includes
25 // Utilities and system includes
26 #include <helper_functions.h> // helper for shared functions common to CUDA SDK samples
27 #include <helper_cuda.h> // helper for CUDA Error handling
29 /* Using updated (v2) interfaces to cublas and cusparse */
30 #include <cuda_runtime.h>
33 float compareResults(int rand_n, float *h_RandGPU, float *h_RandCPU);
35 const int DEFAULT_RAND_N = 2400000;
36 const unsigned int DEFAULT_SEED = 777;
38 ///////////////////////////////////////////////////////////////////////////////
40 ///////////////////////////////////////////////////////////////////////////////
41 int main(int argc, char **argv)
44 printf("%s Starting...\n\n", argv[0]);
46 // initialize the GPU, either identified by --device
47 // or by picking the device with highest flop rate.
48 int devID = findCudaDevice(argc, (const char **)argv);
50 // parsing the number of random numbers to generate
51 int rand_n = DEFAULT_RAND_N;
53 if (checkCmdLineFlag(argc, (const char **) argv, "count"))
55 rand_n = getCmdLineArgumentInt(argc, (const char **) argv, "count");
58 printf("Allocating data for %i samples...\n", rand_n);
61 int seed = DEFAULT_SEED;
63 if (checkCmdLineFlag(argc, (const char **) argv, "seed"))
65 seed = getCmdLineArgumentInt(argc, (const char **) argv, "seed");
68 printf("Seeding with %i ...\n", seed);
72 checkCudaErrors(cudaMalloc((void **)&d_Rand, rand_n * sizeof(float)));
74 curandGenerator_t prngGPU;
75 checkCudaErrors(curandCreateGenerator(&prngGPU, CURAND_RNG_PSEUDO_MTGP32));
76 checkCudaErrors(curandSetPseudoRandomGeneratorSeed(prngGPU, seed));
78 curandGenerator_t prngCPU;
79 checkCudaErrors(curandCreateGeneratorHost(&prngCPU, CURAND_RNG_PSEUDO_MTGP32));
80 checkCudaErrors(curandSetPseudoRandomGeneratorSeed(prngCPU, seed));
83 // Example 1: Compare random numbers generated on GPU and CPU
84 float *h_RandGPU = (float *)malloc(rand_n * sizeof(float));
86 printf("Generating random numbers on GPU...\n\n");
87 checkCudaErrors(curandGenerateUniform(prngGPU, (float *) d_Rand, rand_n));
89 printf("\nReading back the results...\n");
90 checkCudaErrors(cudaMemcpy(h_RandGPU, d_Rand, rand_n * sizeof(float), cudaMemcpyDeviceToHost));
93 float *h_RandCPU = (float *)malloc(rand_n * sizeof(float));
95 printf("Generating random numbers on CPU...\n\n");
96 checkCudaErrors(curandGenerateUniform(prngCPU, (float *) h_RandCPU, rand_n));
98 printf("Comparing CPU/GPU random numbers...\n\n");
99 float L1norm = compareResults(rand_n, h_RandGPU, h_RandCPU);
102 // Example 2: Timing of random number generation on GPU
103 const int numIterations = 10;
105 StopWatchInterface *hTimer;
107 checkCudaErrors(cudaDeviceSynchronize());
108 sdkCreateTimer(&hTimer);
109 sdkResetTimer(&hTimer);
110 sdkStartTimer(&hTimer);
112 for (i = 0; i < numIterations; i++)
114 checkCudaErrors(curandGenerateUniform(prngGPU, (float *) d_Rand, rand_n));
117 checkCudaErrors(cudaDeviceSynchronize());
118 sdkStopTimer(&hTimer);
120 double gpuTime = 1.0e-3 * sdkGetTimerValue(&hTimer)/(double)numIterations;
122 printf("MersenneTwister, Throughput = %.4f GNumbers/s, Time = %.5f s, Size = %u Numbers\n",
123 1.0e-9 * rand_n / gpuTime, gpuTime, rand_n);
125 printf("Shutting down...\n");
127 checkCudaErrors(curandDestroyGenerator(prngGPU));
128 checkCudaErrors(curandDestroyGenerator(prngCPU));
129 checkCudaErrors(cudaFree(d_Rand));
130 sdkDeleteTimer(&hTimer);
135 exit(L1norm < 1e-6 ? EXIT_SUCCESS : EXIT_FAILURE);
139 float compareResults(int rand_n, float *h_RandGPU, float *h_RandCPU)
142 float rCPU, rGPU, delta;
143 float max_delta = 0.;
144 float sum_delta = 0.;
147 for (i = 0; i < rand_n; i++)
151 delta = fabs(rCPU - rGPU);
153 sum_ref += fabs(rCPU);
155 if (delta >= max_delta)
161 float L1norm = (float)(sum_delta / sum_ref);
162 printf("Max absolute error: %E\n", max_delta);
163 printf("L1 norm: %E\n\n", L1norm);