Sampling the tensor network state¶
The following code example illustrates how to sample the tensor network state (e.g., sampling the final quantum circuit state). The full code can be found in the NVIDIA/cuQuantum repository (here).
Headers and error handling¶
7#include <cstdlib>
8#include <cstdio>
9#include <cassert>
10#include <complex>
11#include <vector>
12#include <iostream>
13
14#include <cuda_runtime.h>
15#include <cutensornet.h>
16
17#define HANDLE_CUDA_ERROR(x) \
18{ const auto err = x; \
19 if( err != cudaSuccess ) \
20 { printf("CUDA error %s in line %d\n", cudaGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
21};
22
23#define HANDLE_CUTN_ERROR(x) \
24{ const auto err = x; \
25 if( err != CUTENSORNET_STATUS_SUCCESS ) \
26 { printf("cuTensorNet error %s in line %d\n", cutensornetGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
27};
28
29
30int main()
31{
32 static_assert(sizeof(size_t) == sizeof(int64_t), "Please build this sample on a 64-bit architecture!");
33
34 constexpr std::size_t fp64size = sizeof(double);
Define the tensor network state and the desired number of output samples to generate¶
Let’s define a tensor network state corresponding to a 16-qubit quantum circuit and request to produce 100 output samples for the full qubit register.
38 // Quantum state configuration
39 const int64_t numSamples = 100;
40 const int32_t numQubits = 16;
41 const std::vector<int64_t> qubitDims(numQubits, 2); // qubit size
42 std::cout << "Quantum circuit: " << numQubits << " qubits; " << numSamples << " samples\n";
Initialize the cuTensorNet library handle¶
46 // Initialize the cuTensorNet library
47 HANDLE_CUDA_ERROR(cudaSetDevice(0));
48 cutensornetHandle_t cutnHandle;
49 HANDLE_CUTN_ERROR(cutensornetCreate(&cutnHandle));
50 std::cout << "Initialized cuTensorNet library on GPU 0\n";
Define quantum gates on GPU¶
54 // Define necessary quantum gate tensors in Host memory
55 const double invsq2 = 1.0 / std::sqrt(2.0);
56 // Hadamard gate
57 const std::vector<std::complex<double>> h_gateH {{invsq2, 0.0}, {invsq2, 0.0},
58 {invsq2, 0.0}, {-invsq2, 0.0}};
59 // CX gate
60 const std::vector<std::complex<double>> h_gateCX {{1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
61 {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
62 {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0},
63 {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}};
64
65 // Copy quantum gates to Device memory
66 void *d_gateH{nullptr}, *d_gateCX{nullptr};
67 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateH, 4 * (2 * fp64size)));
68 std::cout << "H gate buffer allocated on GPU: " << d_gateH << std::endl; //debug
69 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateCX, 16 * (2 * fp64size)));
70 std::cout << "CX gate buffer allocated on GPU: " << d_gateCX << std::endl; //debug
71 std::cout << "Allocated quantum gate memory on GPU\n";
72 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateH, h_gateH.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
73 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateCX, h_gateCX.data(), 16 * (2 * fp64size), cudaMemcpyHostToDevice));
74 std::cout << "Copied quantum gates to GPU memory\n";
Create a pure tensor network state¶
Now let’s create a pure tensor network state for a 16-qubit quantum circuit.
78 // Create the initial quantum state
79 cutensornetState_t quantumState;
80 HANDLE_CUTN_ERROR(cutensornetCreateState(cutnHandle, CUTENSORNET_STATE_PURITY_PURE, numQubits, qubitDims.data(),
81 CUDA_C_64F, &quantumState));
82 std::cout << "Created the initial quantum state\n";
Apply quantum gates¶
Let’s construct the GHZ quantum circuit by applying the corresponding quantum gates.
86 // Construct the quantum circuit state (apply quantum gates)
87 int64_t id;
88 HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 1, std::vector<int32_t>{{0}}.data(),
89 d_gateH, nullptr, 1, 0, 1, &id));
90 for(int32_t i = 1; i < numQubits; ++i) {
91 HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 2, std::vector<int32_t>{{i-1,i}}.data(),
92 d_gateCX, nullptr, 1, 0, 1, &id));
93 }
94 std::cout << "Applied quantum gates\n";
Create the tensor network state sampler¶
Once the quantum circuit has been constructed, let’s create the tensor network state sampler for the full qubit register (all qubits).
98 // Create the quantum circuit sampler
99 cutensornetStateSampler_t sampler;
100 HANDLE_CUTN_ERROR(cutensornetCreateSampler(cutnHandle, quantumState, numQubits, nullptr, &sampler));
101 std::cout << "Created the quantum circuit sampler\n";
Configure the tensor network state sampler¶
Now we can configure the tensor network state sampler by setting the number of hyper-samples to be used by the tensor network contraction path finder.
105 // Configure the quantum circuit sampler
106 const int32_t numHyperSamples = 8; // desired number of hyper samples used in the tensor network contraction path finder
107 HANDLE_CUTN_ERROR(cutensornetSamplerConfigure(cutnHandle, sampler,
108 CUTENSORNET_SAMPLER_CONFIG_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples)));
109 const int32_t rndSeed = 13; // explicit random seed to get the same results each run
110 HANDLE_CUTN_ERROR(cutensornetSamplerConfigure(cutnHandle, sampler,
111 CUTENSORNET_SAMPLER_CONFIG_DETERMINISTIC, &rndSeed, sizeof(rndSeed)));
Prepare the tensor network state sampler¶
Let’s create a workspace descriptor and prepare the tensor network state sampler.
115 // Query the free memory on Device
116 std::size_t freeSize {0}, totalSize {0};
117 HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeSize, &totalSize));
118 const std::size_t workSizeAvailable = (freeSize - (freeSize % 4096)) / 2; // use half of available memory with alignment
119
120 // Prepare the quantum circuit sampler
121 cutensornetWorkspaceDescriptor_t workDesc;
122 HANDLE_CUTN_ERROR(cutensornetCreateWorkspaceDescriptor(cutnHandle, &workDesc));
123 HANDLE_CUTN_ERROR(cutensornetSamplerPrepare(cutnHandle, sampler, workSizeAvailable, workDesc, 0x0));
124 std::cout << "Prepared the quantum circuit state sampler\n";
125 double flops {0.0};
126 HANDLE_CUTN_ERROR(cutensornetSamplerGetInfo(cutnHandle, sampler,
127 CUTENSORNET_SAMPLER_INFO_FLOPS, &flops, sizeof(flops)));
128 std::cout << "Total flop count per sample = " << (flops/1e9) << " GFlop\n";
Allocate the workspace buffer on GPU and setup the workspace¶
Allocate the required scratch workspace buffer, and provide extra buffer for cache workspace.
132 // Attach the workspace buffer
133 int64_t worksize {0};
134 HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
135 workDesc,
136 CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
137 CUTENSORNET_MEMSPACE_DEVICE,
138 CUTENSORNET_WORKSPACE_SCRATCH,
139 &worksize));
140 assert(worksize > 0);
141
142 void *d_scratch {nullptr};
143 if(worksize <= workSizeAvailable) {
144 HANDLE_CUDA_ERROR(cudaMalloc(&d_scratch, worksize));
145 std::cout << "Allocated " << worksize << " bytes of scratch memory on GPU: "
146 << "[" << d_scratch << ":" << (void*)(((char*)(d_scratch)) + worksize) << ")\n";
147
148 HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
149 CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
150 }else{
151 std::cout << "ERROR: Insufficient workspace size on Device!\n";
152 std::abort();
153 }
154
155 int64_t reqCacheSize {0};
156 HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
157 workDesc,
158 CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
159 CUTENSORNET_MEMSPACE_DEVICE,
160 CUTENSORNET_WORKSPACE_CACHE,
161 &reqCacheSize));
162
163 //query the free size again
164 HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeSize, &totalSize));
165 // grab the minimum of [required size, or 90% of the free memory (to avoid oversubscribing)]
166 const std::size_t cacheSizeAvailable = std::min(static_cast<size_t>(reqCacheSize), size_t(freeSize * 0.9) - (size_t(freeSize * 0.9) % 4096));
167 void *d_cache {nullptr};
168 HANDLE_CUDA_ERROR(cudaMalloc(&d_cache, cacheSizeAvailable));
169 std::cout << "Allocated " << cacheSizeAvailable << " bytes of cache memory on GPU: "
170 << "[" << d_cache << ":" << (void*)(((char*)(d_cache)) + cacheSizeAvailable) << ")\n";
171 HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle,
172 workDesc,
173 CUTENSORNET_MEMSPACE_DEVICE,
174 CUTENSORNET_WORKSPACE_CACHE,
175 d_cache,
176 cacheSizeAvailable));
177 std::cout << "Set the workspace buffer\n";
Perform sampling of the final quantum circuit state¶
Once everything had been set up, we perform sampling of the quantum circuit state and print the output samples.
181 // Sample the quantum circuit state
182 std::vector<int64_t> samples(numQubits * numSamples); // samples[SampleId][QubitId] reside in Host memory
183 HANDLE_CUTN_ERROR(cutensornetSamplerSample(cutnHandle, sampler, numSamples, workDesc, samples.data(), 0));
184 std::cout << "Performed quantum circuit state sampling\n";
185 std::cout << "Bit-string samples:\n";
186 for(int64_t i = 0; i < numSamples; ++i) {
187 for(int64_t j = 0; j < numQubits; ++j) std::cout << " " << samples[i * numQubits + j];
188 std::cout << std::endl;
189 }
Free resources¶
193 // Destroy the workspace descriptor
194 HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc));
195 std::cout << "Destroyed the workspace descriptor\n";
196
197 // Destroy the quantum circuit sampler
198 HANDLE_CUTN_ERROR(cutensornetDestroySampler(sampler));
199 std::cout << "Destroyed the quantum circuit state sampler\n";
200
201 // Destroy the quantum circuit state
202 HANDLE_CUTN_ERROR(cutensornetDestroyState(quantumState));
203 std::cout << "Destroyed the quantum circuit state\n";
204
205 HANDLE_CUDA_ERROR(cudaFree(d_scratch));
206 HANDLE_CUDA_ERROR(cudaFree(d_cache));
207 HANDLE_CUDA_ERROR(cudaFree(d_gateCX));
208 HANDLE_CUDA_ERROR(cudaFree(d_gateH));
209 std::cout << "Freed memory on GPU\n";
210
211 // Finalize the cuTensorNet library
212 HANDLE_CUTN_ERROR(cutensornetDestroy(cutnHandle));
213 std::cout << "Finalized the cuTensorNet library\n";
214
215 return 0;
216}