Computing Matrix Product State Expectation Value¶
The following code example illustrates how to define a tensor network state and then compute its Matrix Product State (MPS) factorization, followed by computing the expectation value of a tensor network operator over the MPS-factorized state. The full code can be found in the NVIDIA/cuQuantum repository (here).
Headers and error handling¶
7#include <cstdlib>
8#include <cstdio>
9#include <cassert>
10#include <complex>
11#include <vector>
12#include <bitset>
13#include <iostream>
14
15#include <cuda_runtime.h>
16#include <cutensornet.h>
17
18
19#define HANDLE_CUDA_ERROR(x) \
20{ const auto err = x; \
21 if( err != cudaSuccess ) \
22 { printf("CUDA error %s in line %d\n", cudaGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
23};
24
25#define HANDLE_CUTN_ERROR(x) \
26{ const auto err = x; \
27 if( err != CUTENSORNET_STATUS_SUCCESS ) \
28 { printf("cuTensorNet error %s in line %d\n", cutensornetGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
29};
30
31
32int main()
33{
34 static_assert(sizeof(size_t) == sizeof(int64_t), "Please build this sample on a 64-bit architecture!");
35
36 constexpr std::size_t fp64size = sizeof(double);
Define the tensor network state¶
Let’s define a tensor network state corresponding to a 16-qubit quantum circuit.
40 // Quantum state configuration
41 constexpr int32_t numQubits = 16; // number of qubits
42 const std::vector<int64_t> qubitDims(numQubits,2); // qubit dimensions
43 std::cout << "Quantum circuit: " << numQubits << " qubits\n";
Initialize the cuTensorNet library handle¶
47 // Initialize the cuTensorNet library
48 HANDLE_CUDA_ERROR(cudaSetDevice(0));
49 cutensornetHandle_t cutnHandle;
50 HANDLE_CUTN_ERROR(cutensornetCreate(&cutnHandle));
51 std::cout << "Initialized cuTensorNet library on GPU 0\n";
Define quantum gates on GPU¶
55 // Define necessary quantum gate tensors in Host memory
56 const double invsq2 = 1.0 / std::sqrt(2.0);
57 // Hadamard gate
58 const std::vector<std::complex<double>> h_gateH {{invsq2, 0.0}, {invsq2, 0.0},
59 {invsq2, 0.0}, {-invsq2, 0.0}};
60 // Pauli X gate
61 const std::vector<std::complex<double>> h_gateX {{0.0, 0.0}, {1.0, 0.0},
62 {1.0, 0.0}, {0.0, 0.0}};
63 // Pauli Y gate
64 const std::vector<std::complex<double>> h_gateY {{0.0, 0.0}, {0.0, -1.0},
65 {0.0, 1.0}, {0.0, 0.0}};
66 // Pauli Z gate
67 const std::vector<std::complex<double>> h_gateZ {{1.0, 0.0}, {0.0, 0.0},
68 {0.0, 0.0}, {-1.0, 0.0}};
69 // CX gate
70 const std::vector<std::complex<double>> h_gateCX {{1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
71 {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
72 {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0},
73 {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}};
74
75 // Copy quantum gates to Device memory
76 void *d_gateH{nullptr}, *d_gateX{nullptr}, *d_gateY{nullptr}, *d_gateZ{nullptr}, *d_gateCX{nullptr};
77 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateH, 4 * (2 * fp64size)));
78 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateX, 4 * (2 * fp64size)));
79 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateY, 4 * (2 * fp64size)));
80 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateZ, 4 * (2 * fp64size)));
81 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateCX, 16 * (2 * fp64size)));
82 std::cout << "Allocated quantum gate memory on GPU\n";
83 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateH, h_gateH.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
84 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateX, h_gateX.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
85 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateY, h_gateY.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
86 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateZ, h_gateZ.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
87 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateCX, h_gateCX.data(), 16 * (2 * fp64size), cudaMemcpyHostToDevice));
88 std::cout << "Copied quantum gates to GPU memory\n";
Allocate MPS tensors¶
Here we set the shapes of MPS tensors and allocate GPU memory for their storage.
92 // Determine the MPS representation and allocate buffers for the MPS tensors
93 const int64_t maxExtent = 2; // GHZ state can be exactly represented with max bond dimension of 2
94 std::vector<std::vector<int64_t>> extents;
95 std::vector<int64_t*> extentsPtr(numQubits);
96 std::vector<void*> d_mpsTensors(numQubits, nullptr);
97 for (int32_t i = 0; i < numQubits; i++) {
98 if (i == 0) { // left boundary MPS tensor
99 extents.push_back({2, maxExtent});
100 HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * 2 * fp64size));
101 }
102 else if (i == numQubits-1) { // right boundary MPS tensor
103 extents.push_back({maxExtent, 2});
104 HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * 2 * fp64size));
105 }
106 else { // middle MPS tensors
107 extents.push_back({maxExtent, 2, maxExtent});
108 HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * maxExtent * 2 * fp64size));
109 }
110 extentsPtr[i] = extents[i].data();
111 }
Allocate the scratch buffer on GPU¶
115 // Query the free memory on Device
116 std::size_t freeSize{0}, totalSize{0};
117 HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeSize, &totalSize));
118 const std::size_t scratchSize = (freeSize - (freeSize % 4096)) / 2; // use half of available memory with alignment
119 void *d_scratch{nullptr};
120 HANDLE_CUDA_ERROR(cudaMalloc(&d_scratch, scratchSize));
121 std::cout << "Allocated " << scratchSize << " bytes of scratch memory on GPU\n";
Create a pure tensor network state¶
Now let’s create a pure tensor network state for a 16-qubit quantum circuit.
125 // Create the initial quantum state
126 cutensornetState_t quantumState;
127 HANDLE_CUTN_ERROR(cutensornetCreateState(cutnHandle, CUTENSORNET_STATE_PURITY_PURE, numQubits, qubitDims.data(),
128 CUDA_C_64F, &quantumState));
129 std::cout << "Created the initial quantum state\n";
Apply quantum gates¶
Let’s construct the GHZ quantum circuit by applying the corresponding quantum gates.
133 // Construct the final quantum circuit state (apply quantum gates) for the GHZ circuit
134 int64_t id;
135 HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 1, std::vector<int32_t>{{0}}.data(),
136 d_gateH, nullptr, 1, 0, 1, &id));
137 for(int32_t i = 1; i < numQubits; ++i) {
138 HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 2, std::vector<int32_t>{{i-1,i}}.data(),
139 d_gateCX, nullptr, 1, 0, 1, &id));
140 }
141 std::cout << "Applied quantum gates\n";
Request MPS factorization for the final quantum circuit state¶
Here we express our intent to factorize the final quantum circuit state using MPS factorization. The provided shapes of the MPS tensors refer to their maximal size limit during the MPS renormalization procedure. The actually computed shapes of the final MPS tensors may be smaller. No computation is done here yet.
145 // Specify the final target MPS representation (use default fortran strides)
146 HANDLE_CUTN_ERROR(cutensornetStateFinalizeMPS(cutnHandle, quantumState,
147 CUTENSORNET_BOUNDARY_CONDITION_OPEN, extentsPtr.data(), /*strides=*/nullptr ));
Configure MPS factorization procedure¶
After expressing our intent to perform MPS factorization of the final quantum circuit state, we can also configure the MPS factorization procedure by resetting different options, for example, the SVD algorithm.
151 // Optional, set up the SVD method for truncation.
152 cutensornetTensorSVDAlgo_t algo = CUTENSORNET_TENSOR_SVD_ALGO_GESVDJ;
153 HANDLE_CUTN_ERROR(cutensornetStateConfigure(cutnHandle, quantumState,
154 CUTENSORNET_STATE_CONFIG_MPS_SVD_ALGO, &algo, sizeof(algo)));
155 std::cout << "Configured the MPS computation\n";
Prepare the computation of MPS factorization¶
Let’s create a workspace descriptor and prepare the computation of MPS factorization.
159 // Prepare the MPS computation and attach workspace
160 cutensornetWorkspaceDescriptor_t workDesc;
161 HANDLE_CUTN_ERROR(cutensornetCreateWorkspaceDescriptor(cutnHandle, &workDesc));
162 std::cout << "Created the workspace descriptor\n";
163 HANDLE_CUTN_ERROR(cutensornetStatePrepare(cutnHandle, quantumState, scratchSize, workDesc, 0x0));
164 std::cout << "Prepared the computation of the quantum circuit state\n";
165 double flops {0.0};
166 HANDLE_CUTN_ERROR(cutensornetStateGetInfo(cutnHandle, quantumState,
167 CUTENSORNET_STATE_INFO_FLOPS, &flops, sizeof(flops)));
168 if(flops > 0.0) {
169 std::cout << "Total flop count = " << (flops/1e9) << " GFlop\n";
170 }else if(flops < 0.0) {
171 std::cout << "ERROR: Negative Flop count!\n";
172 std::abort();
173 }
174
175 int64_t worksize {0};
176 HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
177 workDesc,
178 CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
179 CUTENSORNET_MEMSPACE_DEVICE,
180 CUTENSORNET_WORKSPACE_SCRATCH,
181 &worksize));
182 std::cout << "Scratch GPU workspace size (bytes) for MPS computation = " << worksize << std::endl;
183 if(worksize <= scratchSize) {
184 HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
185 CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
186 }else{
187 std::cout << "ERROR: Insufficient workspace size on Device!\n";
188 std::abort();
189 }
190 std::cout << "Set the workspace buffer for MPS computation\n";
Compute MPS factorization¶
Once the MPS factorization procedure has been configured and prepared, let’s compute the MPS factorization of the final quantum circuit state.
194 // Execute MPS computation
195 HANDLE_CUTN_ERROR(cutensornetStateCompute(cutnHandle, quantumState,
196 workDesc, extentsPtr.data(), /*strides=*/nullptr, d_mpsTensors.data(), 0));
Construct a tensor network operator¶
Now let’s create an empty tensor network operator for 16-qubit states and then append three components to it, where each component is a direct product of Pauli matrices scaled by some complex coefficient (like in the Jordan-Wigner representation).
200 // Create an empty tensor network operator
201 cutensornetNetworkOperator_t hamiltonian;
202 HANDLE_CUTN_ERROR(cutensornetCreateNetworkOperator(cutnHandle, numQubits, qubitDims.data(), CUDA_C_64F, &hamiltonian));
203 // Append component (0.5 * Z1 * Z2) to the tensor network operator
204 {
205 const int32_t numModes[] = {1, 1}; // Z1 acts on 1 mode, Z2 acts on 1 mode
206 const int32_t modesZ1[] = {1}; // state modes Z1 acts on
207 const int32_t modesZ2[] = {2}; // state modes Z2 acts on
208 const int32_t * stateModes[] = {modesZ1, modesZ2}; // state modes (Z1 * Z2) acts on
209 const void * gateData[] = {d_gateZ, d_gateZ}; // GPU pointers to gate data
210 HANDLE_CUTN_ERROR(cutensornetNetworkOperatorAppendProduct(cutnHandle, hamiltonian, cuDoubleComplex{0.5,0.0},
211 2, numModes, stateModes, NULL, gateData, &id));
212 }
213 // Append component (0.25 * Y3) to the tensor network operator
214 {
215 const int32_t numModes[] = {1}; // Y3 acts on 1 mode
216 const int32_t modesY3[] = {3}; // state modes Y3 acts on
217 const int32_t * stateModes[] = {modesY3}; // state modes (Y3) acts on
218 const void * gateData[] = {d_gateY}; // GPU pointers to gate data
219 HANDLE_CUTN_ERROR(cutensornetNetworkOperatorAppendProduct(cutnHandle, hamiltonian, cuDoubleComplex{0.25,0.0},
220 1, numModes, stateModes, NULL, gateData, &id));
221 }
222 // Append component (0.13 * Y0 X2 Z3) to the tensor network operator
223 {
224 const int32_t numModes[] = {1, 1, 1}; // Y0 acts on 1 mode, X2 acts on 1 mode, Z3 acts on 1 mode
225 const int32_t modesY0[] = {0}; // state modes Y0 acts on
226 const int32_t modesX2[] = {2}; // state modes X2 acts on
227 const int32_t modesZ3[] = {3}; // state modes Z3 acts on
228 const int32_t * stateModes[] = {modesY0, modesX2, modesZ3}; // state modes (Y0 * X2 * Z3) acts on
229 const void * gateData[] = {d_gateY, d_gateX, d_gateZ}; // GPU pointers to gate data
230 HANDLE_CUTN_ERROR(cutensornetNetworkOperatorAppendProduct(cutnHandle, hamiltonian, cuDoubleComplex{0.13,0.0},
231 3, numModes, stateModes, NULL, gateData, &id));
232 }
233 std::cout << "Constructed a tensor network operator: (0.5 * Z1 * Z2) + (0.25 * Y3) + (0.13 * Y0 * X2 * Z3)" << std::endl;
Create the expectation value object¶
Once the quantum circuit and the tensor network operator have been constructed, and the final quantum circuit state has been factorized using the MPS representation, let’s create the expectation value object that will compute the expectation value of the specified tensor network operator over the final MPS-factorized state of the specified quantum circuit.
237 // Specify the quantum circuit expectation value
238 cutensornetStateExpectation_t expectation;
239 HANDLE_CUTN_ERROR(cutensornetCreateExpectation(cutnHandle, quantumState, hamiltonian, &expectation));
240 std::cout << "Created the specified quantum circuit expectation value\n";
Configure the expectation value calculation¶
Now we can configure the expectation value object by setting the number of hyper-samples to be used by the tensor network contraction path finder.
244 // Configure the computation of the specified quantum circuit expectation value
245 const int32_t numHyperSamples = 8; // desired number of hyper samples used in the tensor network contraction path finder
246 HANDLE_CUTN_ERROR(cutensornetExpectationConfigure(cutnHandle, expectation,
247 CUTENSORNET_EXPECTATION_CONFIG_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples)));
Prepare the expectation value calculation¶
Let’s prepare the computation of the desired expectation value.
251 // Prepare the specified quantum circuit expectation value for computation
252 HANDLE_CUTN_ERROR(cutensornetExpectationPrepare(cutnHandle, expectation, scratchSize, workDesc, 0x0));
253 std::cout << "Prepared the specified quantum circuit expectation value\n";
254 flops = 0.0;
255 HANDLE_CUTN_ERROR(cutensornetExpectationGetInfo(cutnHandle, expectation,
256 CUTENSORNET_EXPECTATION_INFO_FLOPS, &flops, sizeof(flops)));
257 std::cout << "Total flop count = " << (flops/1e9) << " GFlop\n";
258 if(flops <= 0.0) {
259 std::cout << "ERROR: Invalid Flop count!\n";
260 std::abort();
261 }
Set up the workspace¶
Now we can set up the required workspace buffer.
265 // Attach the workspace buffer
266 HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
267 workDesc,
268 CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
269 CUTENSORNET_MEMSPACE_DEVICE,
270 CUTENSORNET_WORKSPACE_SCRATCH,
271 &worksize));
272 std::cout << "Required scratch GPU workspace size (bytes) = " << worksize << std::endl;
273 if(worksize <= scratchSize) {
274 HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
275 CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
276 }else{
277 std::cout << "ERROR: Insufficient workspace size on Device!\n";
278 std::abort();
279 }
280 std::cout << "Set the workspace buffer\n";
Compute the requested expectation value¶
Once everything has been set up, we compute the requested expectation value and print it. Note that the returned expectation value is not normalized. The 2-norm of the tensor network state is returned as a separate argument.
284 // Compute the specified quantum circuit expectation value
285 std::complex<double> expectVal{0.0,0.0}, stateNorm2{0.0,0.0};
286 HANDLE_CUTN_ERROR(cutensornetExpectationCompute(cutnHandle, expectation, workDesc,
287 static_cast<void*>(&expectVal), static_cast<void*>(&stateNorm2), 0x0));
288 std::cout << "Computed the specified quantum circuit expectation value\n";
289 expectVal /= stateNorm2;
290 std::cout << "Expectation value = (" << expectVal.real() << ", " << expectVal.imag() << ")\n";
291 std::cout << "Squared 2-norm of the state = (" << stateNorm2.real() << ", " << stateNorm2.imag() << ")\n";
Free resources¶
295 // Destroy the workspace descriptor
296 HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc));
297 std::cout << "Destroyed the workspace descriptor\n";
298
299 // Destroy the quantum circuit expectation value
300 HANDLE_CUTN_ERROR(cutensornetDestroyExpectation(expectation));
301 std::cout << "Destroyed the quantum circuit state expectation value\n";
302
303 // Destroy the tensor network operator
304 HANDLE_CUTN_ERROR(cutensornetDestroyNetworkOperator(hamiltonian));
305 std::cout << "Destroyed the tensor network operator\n";
306
307 // Destroy the quantum circuit state
308 HANDLE_CUTN_ERROR(cutensornetDestroyState(quantumState));
309 std::cout << "Destroyed the quantum circuit state\n";
310
311 for (int32_t i = 0; i < numQubits; i++) {
312 HANDLE_CUDA_ERROR(cudaFree(d_mpsTensors[i]));
313 }
314 HANDLE_CUDA_ERROR(cudaFree(d_scratch));
315 HANDLE_CUDA_ERROR(cudaFree(d_gateCX));
316 HANDLE_CUDA_ERROR(cudaFree(d_gateZ));
317 HANDLE_CUDA_ERROR(cudaFree(d_gateY));
318 HANDLE_CUDA_ERROR(cudaFree(d_gateX));
319 HANDLE_CUDA_ERROR(cudaFree(d_gateH));
320 std::cout << "Freed memory on GPU\n";
321
322 // Finalize the cuTensorNet library
323 HANDLE_CUTN_ERROR(cutensornetDestroy(cutnHandle));
324 std::cout << "Finalized the cuTensorNet library\n";
325
326 return 0;
327}