Sampling the Matrix Product State¶
The following code example illustrates how to define a tensor network state for a given quantum circuit, then compute its Matrix Product State (MPS) factorization, and, finally, sample the MPS-factorized state. The full code can be found in the NVIDIA/cuQuantum repository (here).
Headers and error handling¶
7#include <cstdlib>
8#include <cstdio>
9#include <cassert>
10#include <complex>
11#include <vector>
12#include <iostream>
13
14#include <cuda_runtime.h>
15#include <cutensornet.h>
16
17#define HANDLE_CUDA_ERROR(x) \
18{ const auto err = x; \
19 if( err != cudaSuccess ) \
20 { printf("CUDA error %s in line %d\n", cudaGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
21};
22
23#define HANDLE_CUTN_ERROR(x) \
24{ const auto err = x; \
25 if( err != CUTENSORNET_STATUS_SUCCESS ) \
26 { printf("cuTensorNet error %s in line %d\n", cutensornetGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
27};
28
29
30int main()
31{
32 static_assert(sizeof(size_t) == sizeof(int64_t), "Please build this sample on a 64-bit architecture!");
33
34 constexpr std::size_t fp64size = sizeof(double);
Define the tensor network state and the desired number of output samples to generate¶
Let’s define a tensor network state corresponding to a 16-qubit quantum circuit and request to produce 100 output samples for the full qubit register.
38 // Quantum state configuration
39 const int64_t numSamples = 100;
40 const int32_t numQubits = 16;
41 const std::vector<int64_t> qubitDims(numQubits, 2); // qubit size
42 std::cout << "Quantum circuit: " << numQubits << " qubits; " << numSamples << " samples\n";
Initialize the cuTensorNet library handle¶
46 // Initialize the cuTensorNet library
47 HANDLE_CUDA_ERROR(cudaSetDevice(0));
48 cutensornetHandle_t cutnHandle;
49 HANDLE_CUTN_ERROR(cutensornetCreate(&cutnHandle));
50 std::cout << "Initialized cuTensorNet library on GPU 0\n";
Define quantum gates on GPU¶
54 // Define necessary quantum gate tensors in Host memory
55 const double invsq2 = 1.0 / std::sqrt(2.0);
56 // Hadamard gate
57 const std::vector<std::complex<double>> h_gateH {{invsq2, 0.0}, {invsq2, 0.0},
58 {invsq2, 0.0}, {-invsq2, 0.0}};
59 // CX gate
60 const std::vector<std::complex<double>> h_gateCX {{1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
61 {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
62 {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0},
63 {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}};
64
65 // Copy quantum gates to Device memory
66 void *d_gateH{nullptr}, *d_gateCX{nullptr};
67 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateH, 4 * (2 * fp64size)));
68 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateCX, 16 * (2 * fp64size)));
69 std::cout << "Allocated GPU memory for quantum gates\n";
70 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateH, h_gateH.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
71 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateCX, h_gateCX.data(), 16 * (2 * fp64size), cudaMemcpyHostToDevice));
72 std::cout << "Copied quantum gates to GPU memory\n";
Allocate MPS tensors¶
Here we set the shapes of MPS tensors and allocate GPU memory for their storage.
76 // Determine the MPS representation and allocate buffer for the MPS tensors
77 const int64_t maxExtent = 2; // GHZ state can be exactly represented with max bond dimension of 2
78 std::vector<std::vector<int64_t>> extents;
79 std::vector<int64_t*> extentsPtr(numQubits);
80 std::vector<void*> d_mpsTensors(numQubits, nullptr);
81 for (int32_t i = 0; i < numQubits; i++) {
82 if (i == 0) { // left boundary MPS tensor
83 extents.push_back({2, maxExtent});
84 HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * 2 * fp64size));
85 }
86 else if (i == numQubits-1) { // right boundary MPS tensor
87 extents.push_back({maxExtent, 2});
88 HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * 2 * fp64size));
89 }
90 else { // middle MPS tensors
91 extents.push_back({maxExtent, 2, maxExtent});
92 HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * maxExtent * 2 * fp64size));
93 }
94 extentsPtr[i] = extents[i].data();
95 }
Allocate the scratch buffer on GPU¶
99 // Query the free memory on Device
100 std::size_t freeSize {0}, totalSize {0};
101 HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeSize, &totalSize));
102 const std::size_t scratchSize = (freeSize - (freeSize % 4096)) / 2; // use half of available memory with alignment
103 void *d_scratch {nullptr};
104 HANDLE_CUDA_ERROR(cudaMalloc(&d_scratch, scratchSize));
105 std::cout << "Allocated " << scratchSize << " bytes of scratch memory on GPU: "
106 << "[" << d_scratch << ":" << (void*)(((char*)(d_scratch)) + scratchSize) << ")\n";
Create a pure tensor network state¶
Now let’s create a pure tensor network state for a 16-qubit quantum circuit.
110 // Create the initial quantum state
111 cutensornetState_t quantumState;
112 HANDLE_CUTN_ERROR(cutensornetCreateState(cutnHandle, CUTENSORNET_STATE_PURITY_PURE, numQubits, qubitDims.data(),
113 CUDA_C_64F, &quantumState));
114 std::cout << "Created the initial quantum state\n";
Apply quantum gates¶
Let’s construct the GHZ quantum circuit by applying the corresponding quantum gates.
118 // Construct the quantum circuit state (apply quantum gates)
119 int64_t id;
120 HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 1, std::vector<int32_t>{{0}}.data(),
121 d_gateH, nullptr, 1, 0, 1, &id));
122 for(int32_t i = 1; i < numQubits; ++i) {
123 HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 2, std::vector<int32_t>{{i-1,i}}.data(),
124 d_gateCX, nullptr, 1, 0, 1, &id));
125 }
126 std::cout << "Applied quantum gates\n";
Request MPS factorization for the final quantum circuit state¶
Here we express our intent to factorize the final quantum circuit state using MPS factorization. The provided shapes of the MPS tensors refer to their maximal size limit during the MPS renormalization procedure. The actually computed shapes of the final MPS tensors may be smaller. No computation is done here yet.
130 // Specify the final target MPS representation (use default fortran strides)
131 HANDLE_CUTN_ERROR(cutensornetStateFinalizeMPS(cutnHandle, quantumState,
132 CUTENSORNET_BOUNDARY_CONDITION_OPEN, extentsPtr.data(), /*strides=*/nullptr ));
Configure MPS factorization procedure¶
After expressing our intent to perform MPS factorization of the final quantum circuit state, we can also configure the MPS factorization procedure by resetting different options, for example, the SVD algorithm.
136 // Optional, set up the SVD method for truncation.
137 cutensornetTensorSVDAlgo_t algo = CUTENSORNET_TENSOR_SVD_ALGO_GESVDJ;
138 HANDLE_CUTN_ERROR(cutensornetStateConfigure(cutnHandle, quantumState,
139 CUTENSORNET_STATE_CONFIG_MPS_SVD_ALGO, &algo, sizeof(algo)));
140 std::cout << "Configured the MPS computation\n";
Prepare the computation of MPS factorization¶
Let’s create a workspace descriptor and prepare the computation of MPS factorization.
144 // Prepare the MPS computation and attach workspace
145 cutensornetWorkspaceDescriptor_t workDesc;
146 HANDLE_CUTN_ERROR(cutensornetCreateWorkspaceDescriptor(cutnHandle, &workDesc));
147 std::cout << "Created the workspace descriptor\n";
148 HANDLE_CUTN_ERROR(cutensornetStatePrepare(cutnHandle, quantumState, scratchSize, workDesc, 0x0));
149 std::cout << "Prepared the computation of the quantum circuit state\n";
150 double flops {0.0};
151 HANDLE_CUTN_ERROR(cutensornetStateGetInfo(cutnHandle, quantumState,
152 CUTENSORNET_STATE_INFO_FLOPS, &flops, sizeof(flops)));
153 if(flops > 0.0) {
154 std::cout << "Total flop count = " << (flops/1e9) << " GFlop\n";
155 }else if(flops < 0.0) {
156 std::cout << "ERROR: Negative Flop count!\n";
157 std::abort();
158 }
159
160 int64_t worksize {0};
161 HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
162 workDesc,
163 CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
164 CUTENSORNET_MEMSPACE_DEVICE,
165 CUTENSORNET_WORKSPACE_SCRATCH,
166 &worksize));
167 std::cout << "Scratch GPU workspace size (bytes) for MPS computation = " << worksize << std::endl;
168 if(worksize <= scratchSize) {
169 HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
170 CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
171 }else{
172 std::cout << "ERROR: Insufficient workspace size on Device!\n";
173 std::abort();
174 }
175 std::cout << "Set the workspace buffer for MPS computation\n";
Compute MPS factorization¶
Once the MPS factorization procedure has been configured and prepared, let’s compute the MPS factorization of the final quantum circuit state.
179 // Execute MPS computation
180 HANDLE_CUTN_ERROR(cutensornetStateCompute(cutnHandle, quantumState,
181 workDesc, extentsPtr.data(), /*strides=*/nullptr, d_mpsTensors.data(), 0));
182 std::cout << "Computed MPS factorization\n";
Create the tensor network state sampler¶
Once the quantum circuit has been constructed and factorized using the MPS representation, let’s create the tensor network state sampler for the full qubit register (all qubits).
186 // Create the quantum circuit sampler
187 cutensornetStateSampler_t sampler;
188 HANDLE_CUTN_ERROR(cutensornetCreateSampler(cutnHandle, quantumState, numQubits, nullptr, &sampler));
189 std::cout << "Created the quantum circuit sampler\n";
Configure the tensor network state sampler¶
Now we can configure the tensor network state sampler by setting the number of hyper-samples to be used by the tensor network contraction path finder.
193 // Configure the quantum circuit sampler
194 const int32_t numHyperSamples = 8; // desired number of hyper samples used in the tensor network contraction path finder
195 HANDLE_CUTN_ERROR(cutensornetSamplerConfigure(cutnHandle, sampler,
196 CUTENSORNET_SAMPLER_CONFIG_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples)));
197 std::cout << "Configured the quantum circuit sampler\n";
Prepare the tensor network state sampler¶
Let’s prepare the tensor network state sampler.
201 // Prepare the quantum circuit sampler
202 HANDLE_CUTN_ERROR(cutensornetSamplerPrepare(cutnHandle, sampler, scratchSize, workDesc, 0x0));
203 std::cout << "Prepared the quantum circuit state sampler\n";
204 flops = 0.0;
205 HANDLE_CUTN_ERROR(cutensornetSamplerGetInfo(cutnHandle, sampler,
206 CUTENSORNET_SAMPLER_INFO_FLOPS, &flops, sizeof(flops)));
207 std::cout << "Total flop count per sample = " << (flops/1e9) << " GFlop\n";
208 if(flops <= 0.0) {
209 std::cout << "ERROR: Invalid Flop count!\n";
210 std::abort();
211 }
Set up the workspace¶
Now we can set up the required workspace buffer.
215 // Attach the workspace buffer
216 HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
217 workDesc,
218 CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
219 CUTENSORNET_MEMSPACE_DEVICE,
220 CUTENSORNET_WORKSPACE_SCRATCH,
221 &worksize));
222 std::cout << "Scratch GPU workspace size (bytes) for MPS Sampling = " << worksize << std::endl;
223 assert(worksize > 0);
224 if(worksize <= scratchSize) {
225 HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
226 CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
227 }else{
228 std::cout << "ERROR: Insufficient workspace size on Device!\n";
229 std::abort();
230 }
231 std::cout << "Set the workspace buffer\n";
Perform sampling of the final quantum circuit state¶
Once everything had been set up, we perform sampling of the quantum circuit state and print the output samples.
235 // Sample the quantum circuit state
236 std::vector<int64_t> samples(numQubits * numSamples); // samples[SampleId][QubitId] reside in Host memory
237 HANDLE_CUTN_ERROR(cutensornetSamplerSample(cutnHandle, sampler, numSamples, workDesc, samples.data(), 0));
238 std::cout << "Performed quantum circuit state sampling\n";
239 std::cout << "Bit-string samples:\n";
240 for(int64_t i = 0; i < numSamples; ++i) {
241 for(int64_t j = 0; j < numQubits; ++j) std::cout << " " << samples[i * numQubits + j];
242 std::cout << std::endl;
243 }
Free resources¶
247 // Destroy the workspace descriptor
248 HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc));
249 std::cout << "Destroyed the workspace descriptor\n";
250
251 // Destroy the quantum circuit sampler
252 HANDLE_CUTN_ERROR(cutensornetDestroySampler(sampler));
253 std::cout << "Destroyed the quantum circuit state sampler\n";
254
255 // Destroy the quantum circuit state
256 HANDLE_CUTN_ERROR(cutensornetDestroyState(quantumState));
257 std::cout << "Destroyed the quantum circuit state\n";
258
259 for (int32_t i = 0; i < numQubits; i++) {
260 HANDLE_CUDA_ERROR(cudaFree(d_mpsTensors[i]));
261 }
262 HANDLE_CUDA_ERROR(cudaFree(d_scratch));
263 HANDLE_CUDA_ERROR(cudaFree(d_gateCX));
264 HANDLE_CUDA_ERROR(cudaFree(d_gateH));
265 std::cout << "Freed memory on GPU\n";
266
267 // Finalize the cuTensorNet library
268 HANDLE_CUTN_ERROR(cutensornetDestroy(cutnHandle));
269 std::cout << "Finalized the cuTensorNet library\n";
270
271 return 0;
272}