Generating Same Sequence with NVPL RAND and cuRAND: PseudorandomΒΆ
The following code shows that with NVPL_RAND_ORDERING_CURAND_LEGACY
ordering, NVPL RAND multi-threaded pseudorandom generators create the exactly the same sequence of random numbers as that generated with CURAND_ORDERING_PSEUDO_LEGACY
ordering using cuRAND on GPUs.
#include <iostream>
#include <vector>
#include <chrono>
#include "common.hpp"
// Use NVPL RAND Philox and cuRAND Philox generators to generate random 32-bit unsigned integers and compare results
int main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) {
const size_t length = 100000;
std::vector<unsigned int> array_rand(length);
nvplRandGenerator_t gen_rand;
const auto gen_type = NVPL_RAND_RNG_PSEUDO_PHILOX4_32_10;
// Create multi-threaded generator using 64 threads and set seed, offset and ordering
NVPL_RAND_CHECK(nvplRandMTCreateGenerator(&gen_rand, gen_type, 64));
NVPL_RAND_CHECK(nvplRandSetPseudoRandomGeneratorSeed(gen_rand, 123456ULL));
NVPL_RAND_CHECK(nvplRandSetGeneratorOffset(gen_rand, 123456ULL));
NVPL_RAND_CHECK(nvplRandMTSetGeneratorOrdering(gen_rand, NVPL_RAND_ORDERING_CURAND_LEGACY));
// Generate 32-bit random bits
auto start = std::chrono::high_resolution_clock::now();
NVPL_RAND_CHECK(nvplRandGenerate(gen_rand, array_rand.data(), length));
auto end = std::chrono::high_resolution_clock::now();
auto ms = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
std::cout << "Multi thread nvplRandGenerate with 64 nthreads takes " << float(ms.count()) / 1000.f << " ms \n";
// Cleanup
NVPL_RAND_CHECK(nvplRandDestroyGenerator(gen_rand));
#ifdef NVPL_RAND_EXAMPLE_CURAND_AVAILABLE
// CURAND
curandGenerator_t gen_curand;
const auto curand_gen_type = CURAND_RNG_PSEUDO_PHILOX4_32_10;
unsigned int* devData;
std::vector<unsigned int> array_curand(length);
/* Allocate n floats on device */
CUDA_CHECK(cudaMalloc(reinterpret_cast<void**>(&devData), length * sizeof(unsigned int)));
/* Create pseudo-random number generator */
CURAND_CHECK(curandCreateGenerator(&gen_curand, curand_gen_type));
CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(gen_curand, 123456ULL));
CURAND_CHECK(curandSetGeneratorOffset(gen_curand, 123456ULL));
CURAND_CHECK(curandSetGeneratorOrdering(gen_curand, CURAND_ORDERING_PSEUDO_LEGACY));
//Generate 32-bit random bits on device
start = std::chrono::high_resolution_clock::now();
CURAND_CHECK(curandGenerate(gen_curand, devData, length));
end = std::chrono::high_resolution_clock::now();
ms = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
std::cout << "cuRandGenerate with CURAND_ORDERING_PSEUDO_LEGACY takes " << float(ms.count()) / 1000.f << " ms \n";
/* Copy device memory to host */
CUDA_CHECK(cudaMemcpy(array_curand.data(), devData, length * sizeof(unsigned int), cudaMemcpyDeviceToHost));
CURAND_CHECK(curandDestroyGenerator(gen_curand));
CUDA_CHECK(cudaFree(devData));
// compute hash
unsigned int xor_curand = 0x0;
for (auto i = 0U; i < length; i++) {
xor_curand ^= array_curand[i];
}
std::cout << "CURAND xor output is " << std::hex << xor_curand << std::endl;
// Compare results
for (auto i = 0U; i < length; i += length / 5) {
printf("array_rand[%u] = %u, array_curand[%u] = %u \n", i, array_rand[i], i, array_curand[i]);
}
if (array_rand == array_curand) {
std::cout << "Same sequence is generated with NVPL RAND and cuRAND generator using LEGACY ordering.\n";
std::cout << "Success\n";
return 0;
} else {
std::cout << "FAILED: sequence is generated with NVPL RAND and cuRAND generator using LEGACY ordering.\n";
return -1;
}
#else
// Compare hash
unsigned int xor_rand = 0x0;
for (auto i = 0U; i < length; i++) {
xor_rand ^= array_rand[i];
}
if (xor_rand == 0xc7042bf2) {
std::cout << "Same sequence is generated with NVPL RAND and cuRAND generator using LEGACY ordering.\n";
std::cout << "Success\n";
return 0;
} else {
std::cout << "FAILED: sequence is generated with NVPL RAND and cuRAND generator using LEGACY ordering.\n";
return -1;
}
#endif
};