Generating Same Sequence with NVPL RAND and cuRAND: QuasirandomΒΆ

The following code shows that NVPL RAND multi-threaded SOBOL-variant quasirandom generators create the same sequence of random numbers as that generated with cuRAND.

#include <iostream>
#include <vector>
#include <chrono>
#include "common.hpp"

// Uses NVPL RAND SOBOL and cuRAND SOBOL generators to generate random 64-bit unsigned integers and compare results
int main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) {
    const size_t             length   = 10000;
    const unsigned long long offset   = 0x123456ULL;
    const unsigned int       ndims    = 2000;
    const unsigned int       nthreads = 64;

    std::vector<unsigned long long> array_rand(length);

    nvplRandGenerator_t gen_rand;
    const auto          gen_type = NVPL_RAND_RNG_QUASI_SCRAMBLED_SOBOL64;

    // NVPL RAND
    NVPL_RAND_CHECK(nvplRandMTCreateGenerator(&gen_rand, gen_type, nthreads));
    NVPL_RAND_CHECK(nvplRandSetGeneratorOffset(gen_rand, offset));
    NVPL_RAND_CHECK(nvplRandSetQuasiRandomGeneratorDimensions(gen_rand, ndims));

    // Generate on CPU
    auto start = std::chrono::high_resolution_clock::now();
    NVPL_RAND_CHECK(nvplRandGenerateLongLong(gen_rand, array_rand.data(), length));
    auto end = std::chrono::high_resolution_clock::now();
    auto ms  = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
    std::cout << "Multi thread nvplRandGenerate with SCRAMBLED_SOBOL64 using" << nthreads << " nthreads takes "
              << float(ms.count()) / 1000.f << " ms \n";

    NVPL_RAND_CHECK(nvplRandDestroyGenerator(gen_rand));

#ifdef NVPL_RAND_EXAMPLE_CURAND_AVAILABLE
    // CURAND
    curandGenerator_t gen_curand;
    const auto        curand_gen_type = CURAND_RNG_QUASI_SCRAMBLED_SOBOL64;

    unsigned long long*             devData;
    std::vector<unsigned long long> array_curand(length);

    /* Allocate n floats on device */
    CUDA_CHECK(cudaMalloc(reinterpret_cast<void**>(&devData), length * sizeof(unsigned long long)));

    CURAND_CHECK(curandCreateGenerator(&gen_curand, curand_gen_type));
    CURAND_CHECK(curandSetGeneratorOffset(gen_curand, offset));
    CURAND_CHECK(curandSetQuasiRandomGeneratorDimensions(gen_curand, ndims));

    // Generate 64-bit random bits on GPU
    start = std::chrono::high_resolution_clock::now();
    CURAND_CHECK(curandGenerateLongLong(gen_curand, devData, length));
    end = std::chrono::high_resolution_clock::now();
    ms  = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
    std::cout << "cuRandGenerate with SCRAMBLED_SOBOL64 takes " << float(ms.count()) / 1000.f << " ms \n";

    /* Copy device memory to host */
    CUDA_CHECK(cudaMemcpy(array_curand.data(), devData, length * sizeof(unsigned long long), cudaMemcpyDeviceToHost));

    CURAND_CHECK(curandDestroyGenerator(gen_curand));
    CUDA_CHECK(cudaFree(devData));

    // compute hash
    unsigned long long xor_curand = 0x0;
    for (auto i = 0U; i < length; i++) {
        xor_curand ^= array_curand[i];
    }
    std::cout << "CURAND xor output is " << std::hex << xor_curand << std::endl;

    // Compare results
    for (auto i = 0U; i < length; i += length / 5) {
        printf("array_rand[%u] = %llu, array_curand[%u] = %llu \n", i, array_rand[i], i, array_curand[i]);
    }
    for (auto i = 0U; i < length; i++) {
        if (array_rand[i] != array_curand[i])
            printf("array_rand[%u] = %llu, array_curand[%u] = %llu \n", i, array_rand[i], i, array_curand[i]);
    }
    if (array_rand == array_curand) {
        std::cout << "Same sequence is generated with NVPL RAND and cuRAND SOBOL64 generator using LEGACY ordering.\n";
        std::cout << "Success\n";
        return 0;
    } else {
        std::cout
            << "FAILED: different sequence is generated with NVPL RAND and cuRAND SOBOL64 generator using LEGACY ordering.\n";
        return -1;
    }
#else
    // Compare hash
    unsigned long long xor_rand = 0x0;
    for (auto i = 0U; i < length; i++) {
        xor_rand ^= array_rand[i];
    }
    std::cout << "NVPL RAND xor output is " << std::hex << xor_rand << std::endl;
    if (xor_rand == 0xa75c795246aa956c) {
        std::cout << "Same sequence is generated with NVPL RAND and cuRAND SOBOL64 generator using LEGACY ordering.\n";
        std::cout << "Success\n";
        return 0;
    } else {
        std::cout
            << "FAILED: Different sequence is generated with NVPL RAND and cuRAND SOBOL64 generator using LEGACY ordering.\n";
        return -1;
    }
#endif
};