Examples#
Library initialization#
#include <custabilizer.h>
int main() {
custabilizerHandle_t handle;
custabilizerCreate(&handle);
custabilizerDestroy(handle);
return 0;
}
Simple circuit simulation#
This example demonstrates how to create a circuit and apply it using custabilizerFrameSimulatorApplyCircuit().
#include <custabilizer.h>
#include <cuda_runtime.h>
int main() {
// Create library handle
custabilizerHandle_t handle;
custabilizerCreate(&handle);
// Define a simple circuit
const char* circuitString =
"X_ERROR(0.01) 0 1\n"
"H 0\n"
"CNOT 0 1\n"
"M 0 1\n";
// Create circuit
int64_t bufferSize;
custabilizerCircuitSizeFromString(handle, circuitString, &bufferSize);
void* bufferDevice;
cudaMalloc(&bufferDevice, bufferSize);
custabilizerCircuit_t circuit;
custabilizerCreateCircuitFromString(handle, circuitString, bufferDevice,
bufferSize, &circuit);
// Set up frame simulator parameters
int64_t numQubits = 2;
int64_t numShots = 1017;
int64_t numMeasurements = 2;
int64_t stride = ((numShots + 7) / 8 + 3) & ~3; // Pad to multiple of 4
// Create frame simulator
custabilizerFrameSimulator_t frameSimulator;
custabilizerCreateFrameSimulator(handle, numQubits, numShots,
numMeasurements, stride, &frameSimulator);
// Allocate and initialize bit tables
int64_t bitTableBytes = numQubits * stride;
int64_t mTableBytes = numMeasurements * stride;
custabilizerBitInt_t* xTableDevice;
custabilizerBitInt_t* zTableDevice;
custabilizerBitInt_t* mTableDevice;
cudaMalloc(&xTableDevice, bitTableBytes);
cudaMalloc(&zTableDevice, bitTableBytes);
cudaMalloc(&mTableDevice, mTableBytes);
cudaMemset(xTableDevice, 0, bitTableBytes);
cudaMemset(zTableDevice, 0, bitTableBytes);
cudaMemset(mTableDevice, 0, mTableBytes);
// Apply circuit to the Pauli frames
uint64_t seed = 42;
int randomizeFrameAfterMeasurement = 1;
cudaStream_t stream = 0;
custabilizerFrameSimulatorApplyCircuit(handle, frameSimulator, circuit,
randomizeFrameAfterMeasurement, seed,
xTableDevice, zTableDevice, mTableDevice,
stream);
cudaStreamSynchronize(stream);
// Clean up
cudaFree(xTableDevice);
cudaFree(zTableDevice);
cudaFree(mTableDevice);
custabilizerDestroyFrameSimulator(frameSimulator);
custabilizerDestroyCircuit(circuit);
cudaFree(bufferDevice);
custabilizerDestroy(handle);
return 0;
}
DEM sampling pipeline#
This example demonstrates Bernoulli sampling from a probability array followed
by a GF(2) sparse-dense matrix multiplication to compute detector outcomes.
This is the building-block workflow behind DEMSampler.
#include <custabilizer.h>
#include <cuda_runtime.h>
#include <stdlib.h>
int main() {
custabilizerHandle_t handle;
custabilizerCreate(&handle);
int64_t numErrors = 4;
int64_t numDets = 3;
int64_t numShots = 1024;
int64_t numDetsPad = ((numDets + 31) / 32) * 32; // round up to 32
// Error probabilities
double probsHost[] = {0.01, 0.02, 0.005, 0.03};
double* probsDev;
cudaMalloc(&probsDev, numErrors * sizeof(double));
cudaMemcpy(probsDev, probsHost, numErrors * sizeof(double),
cudaMemcpyHostToDevice);
// Step 1: Sparse Bernoulli sampling
size_t wsSize;
custabilizerSampleProbArraySparsePrepare(handle, numShots, numErrors,
&wsSize);
void* wsDev;
cudaMalloc(&wsDev, wsSize);
int64_t capacity = numShots; // initial guess
uint64_t* colDev;
uint64_t* rowDev;
cudaMalloc(&colDev, capacity * sizeof(uint64_t));
cudaMalloc(&rowDev, (numShots + 1) * sizeof(uint64_t));
uint64_t nnz = (uint64_t)capacity;
custabilizerStatus_t st = custabilizerSampleProbArraySparseCompute(
handle, numShots, numErrors, probsDev, 42,
&nnz, colDev, rowDev, wsDev, wsSize, 0);
if (st == CUSTABILIZER_STATUS_INSUFFICIENT_SPARSE_STORAGE) {
// nnz now holds the required capacity; reallocate and retry
cudaFree(colDev);
cudaMalloc(&colDev, nnz * sizeof(uint64_t));
custabilizerSampleProbArraySparseCompute(
handle, numShots, numErrors, probsDev, 42,
&nnz, colDev, rowDev, wsDev, wsSize, 0);
}
// Step 2: Detector matrix B (dense, bit-packed)
int64_t wordsPerRow = numDetsPad / 32;
int64_t bBytes = numErrors * wordsPerRow * sizeof(custabilizerBitInt_t);
custabilizerBitInt_t* bHost = (custabilizerBitInt_t*)calloc(
numErrors * wordsPerRow, sizeof(custabilizerBitInt_t));
// error 0 -> det 0,1 | error 1 -> det 1
// error 2 -> det 2 | error 3 -> det 0,2
bHost[0 * wordsPerRow] = 0x3; // bits 0,1
bHost[1 * wordsPerRow] = 0x2; // bit 1
bHost[2 * wordsPerRow] = 0x4; // bit 2
bHost[3 * wordsPerRow] = 0x5; // bits 0,2
custabilizerBitInt_t* bDev;
cudaMalloc(&bDev, bBytes);
cudaMemcpy(bDev, bHost, bBytes, cudaMemcpyHostToDevice);
// Step 3: GF(2) matmul C = S @ B
int64_t cWords = numShots * wordsPerRow;
custabilizerBitInt_t* cDev;
cudaMalloc(&cDev, cWords * sizeof(custabilizerBitInt_t));
custabilizerGF2SparseDenseMatrixMultiply(
handle, numShots, numDetsPad, numErrors, nnz,
colDev, rowDev, bDev, 0, cDev, 0);
cudaDeviceSynchronize();
// Clean up
free(bHost);
cudaFree(cDev);
cudaFree(bDev);
cudaFree(colDev);
cudaFree(rowDev);
cudaFree(wsDev);
cudaFree(probsDev);
custabilizerDestroy(handle);
return 0;
}