Quasirandom Scrambled Sobol Generators#
This following example uses cuRANDDx API to generate uniformly distributed data using the 64-bit scrambled Sobol generator, and compare the results with these generated with the cuRAND host API.
// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// NVIDIA CORPORATION and its licensors retain all intellectual property
// and proprietary rights in and to this software, related documentation
// and any modifications thereto. Any use, reproduction, disclosure or
// distribution of this software and related documentation without an express
// license agreement from NVIDIA CORPORATION is strictly prohibited.
#include <iostream>
#include <vector>
#include <cassert>
#include <curanddx.hpp>
#include "common.hpp"
// This example demonstrates how to use quasi-random generator scrambled sobol64 and cuRANDDx thread-level operator
// to generate a sequence of random numbers and compare with the results generated using cuRAND host API
template<class RNG, typename DataType>
__global__ void generate_kernel(DataType* d_out,
typename RNG::direction_vector_type* direction_vectors,
const typename RNG::scrambled_const_type* scrambled_consts,
const typename RNG::offset_type offset,
const unsigned int num_dims,
const size_t size,
DataType input1,
DataType input2) {
int tid = blockDim.x * blockIdx.x + threadIdx.x;
const int threads = blockDim.x * gridDim.x;
curanddx::uniform<float> my_uniform(input1, input2);
// When generating n results in m dimensions, the output will consist of n//m results from dimension 0, followed by
// another n/m results from dimension 1, etc.
const int size_per_dim = size / num_dims;
for (auto idx = tid; idx < size; idx += threads) {
// for each dimension, tid0 generates the first number, tid1 the second number etc.
const int idex_per_dim = idx % size_per_dim;
const int dim = idx / size_per_dim;
RNG rng(dim, direction_vectors, offset + idex_per_dim, scrambled_consts);
d_out[idx] = my_uniform.generate(rng);
}
}
template<unsigned int Arch>
int sobol_thread_api() {
using RNG =
decltype(curanddx::Generator<curanddx::scrambled_sobol64>() + curanddx::SM<Arch>() + curanddx::Thread());
using DataType = float;
// Allocate output memory
DataType* d_out;
const size_t size = 5000;
CUDA_CHECK_AND_EXIT(cudaMalloc((void**)&d_out, size * sizeof(DataType)));
// size has to be evenly divisible by dimensions
const unsigned int sobol_dims = 200;
assert(size % sobol_dims == 0);
const typename RNG::offset_type offset = 10ULL;
const DataType min_v = -1;
const DataType max_v = 1;
// Step 1: Get pointers to the host direction vectors using cuRAND API
using direction_vector_type = typename RNG::direction_vector_type;
direction_vector_type* h_direction_vectors;
CURAND_CHECK_AND_EXIT(
curandGetDirectionVectors64(&h_direction_vectors, CURAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6));
// Step 2: Allocate memory and copy one direction vector per dimension to the device
direction_vector_type* direction_vectors_gmem_ptr;
CUDA_CHECK_AND_EXIT(cudaMalloc((void**)&(direction_vectors_gmem_ptr), sobol_dims * sizeof(direction_vector_type)));
CUDA_CHECK_AND_EXIT(cudaMemcpy(direction_vectors_gmem_ptr,
h_direction_vectors,
sobol_dims * sizeof(direction_vector_type),
cudaMemcpyHostToDevice));
// Step 3: Get pointers to the host scrambled constants
using scrambled_const_type = typename RNG::scrambled_const_type;
scrambled_const_type* h_scrambled_consts;
scrambled_const_type* scrambled_consts_gmem_ptr;
CURAND_CHECK_AND_EXIT(curandGetScrambleConstants64(&h_scrambled_consts));
// Step 4: Allocate memory and copy one scrambled const per dimension to the device
CUDA_CHECK_AND_EXIT(cudaMalloc((void**)&(scrambled_consts_gmem_ptr), sobol_dims * sizeof(scrambled_const_type)));
CUDA_CHECK_AND_EXIT(cudaMemcpy(scrambled_consts_gmem_ptr,
h_scrambled_consts,
sobol_dims * sizeof(scrambled_const_type),
cudaMemcpyHostToDevice));
// Invokes a kernel which uses cuRANDDx functions
const unsigned int block_dim = 256;
const unsigned int grid_size = 16;
generate_kernel<RNG, DataType><<<grid_size, block_dim, 0>>>(
d_out, direction_vectors_gmem_ptr, scrambled_consts_gmem_ptr, offset, sobol_dims, size, min_v, max_v);
CUDA_CHECK_AND_EXIT(cudaPeekAtLastError());
CUDA_CHECK_AND_EXIT(cudaDeviceSynchronize());
std::vector<DataType> h_out(size);
CUDA_CHECK_AND_EXIT(cudaMemcpy(h_out.data(), d_out, size * sizeof(DataType), cudaMemcpyDeviceToHost));
// cuRAND host API
curandGenerator_t gen_curand;
DataType* d_ref;
CUDA_CHECK_AND_EXIT(cudaMalloc((void**)&d_ref, size * sizeof(DataType)));
CURAND_CHECK_AND_EXIT(curandCreateGenerator(&gen_curand, CURAND_RNG_QUASI_SCRAMBLED_SOBOL64));
CURAND_CHECK_AND_EXIT(curandSetGeneratorOffset(gen_curand, offset));
CURAND_CHECK_AND_EXIT(curandSetQuasiRandomGeneratorDimensions(gen_curand, sobol_dims));
CURAND_CHECK_AND_EXIT(curandSetGeneratorOrdering(gen_curand, CURAND_ORDERING_QUASI_DEFAULT));
CURAND_CHECK_AND_EXIT(curandGenerateUniform(gen_curand, d_ref, size));
std::vector<DataType> h_ref(size);
CUDA_CHECK_AND_EXIT(cudaMemcpy(h_ref.data(), d_ref, size * sizeof(DataType), cudaMemcpyDeviceToHost));
// scale the reference data to range(min_v, max_v)
for( DataType &v : h_ref ) v = min_v + v * (max_v - min_v);
CURAND_CHECK_AND_EXIT(curandDestroyGenerator(gen_curand));
CUDA_CHECK_AND_EXIT(cudaFree(scrambled_consts_gmem_ptr));
CUDA_CHECK_AND_EXIT(cudaFree(direction_vectors_gmem_ptr));
CUDA_CHECK_AND_EXIT(cudaFree(d_out));
CUDA_CHECK_AND_EXIT(cudaFree(d_ref));
// Compare Results
if (h_out == h_ref) {
std::cout
<< "SUCCESS: Same sequence is generated with cuRANDDx and cuRAND Host API using QUASI_DEFAULT ordering.\n";
return 0;
} else {
for (auto i = 0U; i < 10; i++) {
std::cout << "array_curanddx[" << i << "] = " << h_out[i] << " array_curand[" << i << "] = " << h_ref[i]
<< std::endl;
}
std::cout << "FAILED: Different sequence is generated with cuRANDDx and cuRAND Host API using LEGACY "
"ordering.\n";
return 1;
}
}
template<unsigned int Arch>
struct sobol_thread_api_functor {
int operator()() { return sobol_thread_api<Arch>(); }
};
int main(int, char**) {
return example::sm_runner<sobol_thread_api_functor>();
}