Computing Matrix Product State (MPS) Amplitudes¶
The following code example illustrates how to define a tensor network state, factorize it as a Matrix Product State (MPS), and then compute a slice of amplitudes of the factorized MPS state. The full code can be found in the NVIDIA/cuQuantum repository (here).
Headers and error handling¶
7#include <cstdlib>
8#include <cstdio>
9#include <cassert>
10#include <complex>
11#include <vector>
12#include <bitset>
13#include <iostream>
14
15#include <cuda_runtime.h>
16#include <cutensornet.h>
17
18
19#define HANDLE_CUDA_ERROR(x) \
20{ const auto err = x; \
21 if( err != cudaSuccess ) \
22 { printf("CUDA error %s in line %d\n", cudaGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
23};
24
25#define HANDLE_CUTN_ERROR(x) \
26{ const auto err = x; \
27 if( err != CUTENSORNET_STATUS_SUCCESS ) \
28 { printf("cuTensorNet error %s in line %d\n", cutensornetGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
29};
30
31
32int main()
33{
34 static_assert(sizeof(size_t) == sizeof(int64_t), "Please build this sample on a 64-bit architecture!");
35
36 constexpr std::size_t fp64size = sizeof(double);
Define the tensor network state and the desired slice of state amplitudes¶
Let’s define a tensor network state corresponding to a 6-qubit quantum circuit and request a slice of state amplitudes where qubits 0 and 1 are fixed at value 1.
40 // Quantum state configuration
41 constexpr int32_t numQubits = 6; // number of qubits
42 const std::vector<int64_t> qubitDims(numQubits,2); // qubit dimensions
43 const std::vector<int32_t> fixedModes({0,1}); // fixed modes in the output amplitude tensor (must be in acsending order)
44 const std::vector<int64_t> fixedValues({1,1}); // values of the fixed modes in the output amplitude tensor
45 const int32_t numFixedModes = fixedModes.size(); // number of fixed modes in the output amplitude tensor
46 std::cout << "Quantum circuit: " << numQubits << " qubits\n";
Initialize the cuTensorNet library handle¶
50 // Initialize the cuTensorNet library
51 HANDLE_CUDA_ERROR(cudaSetDevice(0));
52 cutensornetHandle_t cutnHandle;
53 HANDLE_CUTN_ERROR(cutensornetCreate(&cutnHandle));
54 std::cout << "Initialized cuTensorNet library on GPU 0\n";
Define quantum gates on GPU¶
58 // Define necessary quantum gate tensors in Host memory
59 const double invsq2 = 1.0 / std::sqrt(2.0);
60 // Hadamard gate
61 const std::vector<std::complex<double>> h_gateH {{invsq2, 0.0}, {invsq2, 0.0},
62 {invsq2, 0.0}, {-invsq2, 0.0}};
63 // CX gate
64 const std::vector<std::complex<double>> h_gateCX {{1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
65 {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
66 {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0},
67 {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}};
68
69 // Copy quantum gates to Device memory
70 void *d_gateH{nullptr}, *d_gateCX{nullptr};
71 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateH, 4 * (2 * fp64size)));
72 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateCX, 16 * (2 * fp64size)));
73 std::cout << "Allocated quantum gate memory on GPU\n";
74 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateH, h_gateH.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
75 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateCX, h_gateCX.data(), 16 * (2 * fp64size), cudaMemcpyHostToDevice));
76 std::cout << "Copied quantum gates to GPU memory\n";
Allocate MPS tensors¶
Here we set the shapes of MPS tensors and allocate GPU memory for their storage.
80 // Determine the MPS representation and allocate buffers for the MPS tensors
81 const int64_t maxExtent = 2; // GHZ state can be exactly represented with max bond dimension of 2
82 std::vector<std::vector<int64_t>> extents;
83 std::vector<int64_t*> extentsPtr(numQubits);
84 std::vector<void*> d_mpsTensors(numQubits, nullptr);
85 for (int32_t i = 0; i < numQubits; i++) {
86 if (i == 0) { // left boundary MPS tensor
87 extents.push_back({2, maxExtent});
88 HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * 2 * fp64size));
89 }
90 else if (i == numQubits-1) { // right boundary MPS tensor
91 extents.push_back({maxExtent, 2});
92 HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * 2 * fp64size));
93 }
94 else { // middle MPS tensors
95 extents.push_back({maxExtent, 2, maxExtent});
96 HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * maxExtent * 2 * fp64size));
97 }
98 extentsPtr[i] = extents[i].data();
99 }
Allocate the amplitudes slice tensor on GPU¶
Here we allocate GPU memory for the requested amplitudes slice tensor.
103 // Allocate Device memory for the specified slice of the quantum circuit amplitudes tensor
104 void *d_amp{nullptr};
105 std::size_t ampSize = 1;
106 for(const auto & qubitDim: qubitDims) ampSize *= qubitDim; // all state modes (full size)
107 for(const auto & fixedMode: fixedModes) ampSize /= qubitDims[fixedMode]; // fixed state modes reduce the slice size
108 HANDLE_CUDA_ERROR(cudaMalloc(&d_amp, ampSize * (2 * fp64size)));
109 std::cout << "Allocated memory for the specified slice of the quantum circuit amplitude tensor of size "
110 << ampSize << " elements\n";
Allocate the scratch buffer on GPU¶
114 // Query the free memory on Device
115 std::size_t freeSize{0}, totalSize{0};
116 HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeSize, &totalSize));
117 const std::size_t scratchSize = (freeSize - (freeSize % 4096)) / 2; // use half of available memory with alignment
118 void *d_scratch{nullptr};
119 HANDLE_CUDA_ERROR(cudaMalloc(&d_scratch, scratchSize));
120 std::cout << "Allocated " << scratchSize << " bytes of scratch memory on GPU\n";
Create a pure tensor network state¶
Now let’s create a pure tensor network state for a 6-qubit quantum circuit.
124 // Create the initial quantum state
125 cutensornetState_t quantumState;
126 HANDLE_CUTN_ERROR(cutensornetCreateState(cutnHandle, CUTENSORNET_STATE_PURITY_PURE, numQubits, qubitDims.data(),
127 CUDA_C_64F, &quantumState));
128 std::cout << "Created the initial quantum state\n";
Apply quantum gates¶
Let’s construct the GHZ quantum circuit by applying the corresponding quantum gates.
132 // Construct the final quantum circuit state (apply quantum gates) for the GHZ circuit
133 int64_t id;
134 HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 1, std::vector<int32_t>{{0}}.data(),
135 d_gateH, nullptr, 1, 0, 1, &id));
136 for(int32_t i = 1; i < numQubits; ++i) {
137 HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 2, std::vector<int32_t>{{i-1,i}}.data(),
138 d_gateCX, nullptr, 1, 0, 1, &id));
139 }
140 std::cout << "Applied quantum gates\n";
Request MPS factorization for the final quantum circuit state¶
Here we express our intent to factorize the final quantum circuit state using MPS factorization. The provided shapes of the MPS tensors refer to their maximal size limit during the MPS renormalization procedure. The actually computed shapes of the final MPS tensors may be smaller. No computation is done here yet.
144 // Specify the final target MPS representation (use default fortran strides)
145 HANDLE_CUTN_ERROR(cutensornetStateFinalizeMPS(cutnHandle, quantumState,
146 CUTENSORNET_BOUNDARY_CONDITION_OPEN, extentsPtr.data(), /*strides=*/nullptr));
147 std::cout << "Requested the final MPS factorization of the quantum circuit state\n";
Configure MPS factorization procedure¶
After expressing our intent to perform MPS factorization of the final quantum circuit state, we can also configure the MPS factorization procedure by resetting different options, for example, the SVD algorithm.
151 // Optional, set up the SVD method for MPS truncation.
152 cutensornetTensorSVDAlgo_t algo = CUTENSORNET_TENSOR_SVD_ALGO_GESVDJ;
153 HANDLE_CUTN_ERROR(cutensornetStateConfigure(cutnHandle, quantumState,
154 CUTENSORNET_STATE_CONFIG_MPS_SVD_ALGO, &algo, sizeof(algo)));
155 std::cout << "Configured the MPS factorization computation\n";
Prepare the computation of MPS factorization¶
Let’s create a workspace descriptor and prepare the computation of MPS factorization.
159 // Prepare the MPS computation and attach workspace
160 cutensornetWorkspaceDescriptor_t workDesc;
161 HANDLE_CUTN_ERROR(cutensornetCreateWorkspaceDescriptor(cutnHandle, &workDesc));
162 std::cout << "Created the workspace descriptor\n";
163 HANDLE_CUTN_ERROR(cutensornetStatePrepare(cutnHandle, quantumState, scratchSize, workDesc, 0x0));
164 std::cout << "Prepared the computation of the quantum circuit state\n";
165 double flops {0.0};
166 HANDLE_CUTN_ERROR(cutensornetStateGetInfo(cutnHandle, quantumState,
167 CUTENSORNET_STATE_INFO_FLOPS, &flops, sizeof(flops)));
168 if(flops > 0.0) {
169 std::cout << "Total flop count = " << (flops/1e9) << " GFlop\n";
170 }else if(flops < 0.0) {
171 std::cout << "ERROR: Negative Flop count!\n";
172 std::abort();
173 }
174
175 int64_t worksize {0};
176 HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
177 workDesc,
178 CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
179 CUTENSORNET_MEMSPACE_DEVICE,
180 CUTENSORNET_WORKSPACE_SCRATCH,
181 &worksize));
182 std::cout << "Scratch GPU workspace size (bytes) for MPS computation = " << worksize << std::endl;
183 if(worksize <= scratchSize) {
184 HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
185 CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
186 }else{
187 std::cout << "ERROR: Insufficient workspace size on Device!\n";
188 std::abort();
189 }
190 std::cout << "Set the workspace buffer for the MPS factorization computation\n";
Compute MPS factorization¶
Once the MPS factorization procedure has been configured and prepared, let’s compute the MPS factorization of the final quantum circuit state.
194 // Execute MPS computation
195 HANDLE_CUTN_ERROR(cutensornetStateCompute(cutnHandle, quantumState,
196 workDesc, extentsPtr.data(), /*strides=*/nullptr, d_mpsTensors.data(), 0));
197 std::cout << "Computed the MPS factorization\n";
Create the state amplitudes accessor¶
Once the factorized MPS representation of the final quantum circuit state has been computed, let’s create the amplitudes accessor object that will compute the requested slice of state amplitudes.
201 // Specify the quantum circuit amplitudes accessor
202 cutensornetStateAccessor_t accessor;
203 HANDLE_CUTN_ERROR(cutensornetCreateAccessor(cutnHandle, quantumState, numFixedModes, fixedModes.data(),
204 nullptr, &accessor)); // using default strides
205 std::cout << "Created the specified quantum circuit amplitudes accessor\n";
Configure the state amplitudes accessor¶
Now we can configure the state amplitudes accessor object by setting the number of hyper-samples to be used by the tensor network contraction path finder.
209 // Configure the computation of the slice of the specified quantum circuit amplitudes tensor
210 const int32_t numHyperSamples = 8; // desired number of hyper samples used in the tensor network contraction path finder
211 HANDLE_CUTN_ERROR(cutensornetAccessorConfigure(cutnHandle, accessor,
212 CUTENSORNET_ACCESSOR_CONFIG_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples)));
Prepare the computation of the amplitudes slice tensor¶
Let’s prepare the computation of the amplitudes slice tensor.
216 // Prepare the computation of the specified slice of the quantum circuit amplitudes tensor
217 HANDLE_CUTN_ERROR(cutensornetAccessorPrepare(cutnHandle, accessor, scratchSize, workDesc, 0x0));
218 std::cout << "Prepared the computation of the specified slice of the quantum circuit amplitudes tensor\n";
219 flops = 0.0;
220 HANDLE_CUTN_ERROR(cutensornetAccessorGetInfo(cutnHandle, accessor,
221 CUTENSORNET_ACCESSOR_INFO_FLOPS, &flops, sizeof(flops)));
222 std::cout << "Total flop count = " << (flops/1e9) << " GFlop\n";
223 if(flops <= 0.0) {
224 std::cout << "ERROR: Invalid Flop count!\n";
225 std::abort();
226 }
Set up the workspace¶
Now we can set up the required workspace buffer.
230 // Attach the workspace buffer
231 HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
232 workDesc,
233 CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
234 CUTENSORNET_MEMSPACE_DEVICE,
235 CUTENSORNET_WORKSPACE_SCRATCH,
236 &worksize));
237 std::cout << "Required scratch GPU workspace size (bytes) = " << worksize << std::endl;
238 if(worksize <= scratchSize) {
239 HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
240 CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
241 }else{
242 std::cout << "ERROR: Insufficient workspace size on Device!\n";
243 std::abort();
244 }
245 std::cout << "Set the workspace buffer\n";
Compute the specified slice of state amplitudes¶
Once everything has been set up, we compute the requested slice of state amplitudes, copy it back to Host memory, and print it.
249 // Compute the specified slice of the quantum circuit amplitudes tensor
250 std::complex<double> stateNorm2{0.0,0.0};
251 HANDLE_CUTN_ERROR(cutensornetAccessorCompute(cutnHandle, accessor, fixedValues.data(),
252 workDesc, d_amp, static_cast<void*>(&stateNorm2), 0x0));
253 std::cout << "Computed the specified slice of the quantum circuit amplitudes tensor\n";
254 std::vector<std::complex<double>> h_amp(ampSize);
255 HANDLE_CUDA_ERROR(cudaMemcpy(h_amp.data(), d_amp, ampSize * (2 * fp64size), cudaMemcpyDeviceToHost));
256 std::cout << "Amplitudes slice for " << (numQubits - numFixedModes) << " qubits:\n";
257 for(std::size_t i = 0; i < ampSize; ++i) {
258 std::cout << " " << h_amp[i] << std::endl;
259 }
260 std::cout << "Squared 2-norm of the state = (" << stateNorm2.real() << ", " << stateNorm2.imag() << ")\n";
Free resources¶
264 // Destroy the workspace descriptor
265 HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc));
266 std::cout << "Destroyed the workspace descriptor\n";
267
268 // Destroy the quantum circuit amplitudes accessor
269 HANDLE_CUTN_ERROR(cutensornetDestroyAccessor(accessor));
270 std::cout << "Destroyed the quantum circuit amplitudes accessor\n";
271
272 // Destroy the quantum circuit state
273 HANDLE_CUTN_ERROR(cutensornetDestroyState(quantumState));
274 std::cout << "Destroyed the quantum circuit state\n";
275
276 for (int32_t i = 0; i < numQubits; i++) {
277 HANDLE_CUDA_ERROR(cudaFree(d_mpsTensors[i]));
278 }
279 HANDLE_CUDA_ERROR(cudaFree(d_scratch));
280 HANDLE_CUDA_ERROR(cudaFree(d_amp));
281 HANDLE_CUDA_ERROR(cudaFree(d_gateCX));
282 HANDLE_CUDA_ERROR(cudaFree(d_gateH));
283 std::cout << "Freed memory on GPU\n";
284
285 // Finalize the cuTensorNet library
286 HANDLE_CUTN_ERROR(cutensornetDestroy(cutnHandle));
287 std::cout << "Finalized the cuTensorNet library\n";
288
289 return 0;
290}