Computing Matrix Product State Expectation Value

The following code example illustrates how to define a tensor network state and then compute its Matrix Product State (MPS) factorization, followed by computing the expectation value of a tensor network operator over the MPS-factorized state. The full code can be found in the NVIDIA/cuQuantum repository (here).

Headers and error handling

 7#include <cstdlib>
 8#include <cstdio>
 9#include <cassert>
10#include <complex>
11#include <vector>
12#include <bitset>
13#include <iostream>
14
15#include <cuda_runtime.h>
16#include <cutensornet.h>
17
18
19#define HANDLE_CUDA_ERROR(x) \
20{ const auto err = x; \
21  if( err != cudaSuccess ) \
22  { printf("CUDA error %s in line %d\n", cudaGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
23};
24
25#define HANDLE_CUTN_ERROR(x) \
26{ const auto err = x; \
27  if( err != CUTENSORNET_STATUS_SUCCESS ) \
28  { printf("cuTensorNet error %s in line %d\n", cutensornetGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
29};
30
31
32int main()
33{
34  static_assert(sizeof(size_t) == sizeof(int64_t), "Please build this sample on a 64-bit architecture!");
35
36  constexpr std::size_t fp64size = sizeof(double);

Define the tensor network state

Let’s define a tensor network state corresponding to a 16-qubit quantum circuit.

40  // Quantum state configuration
41  constexpr int32_t numQubits = 16; // number of qubits
42  const std::vector<int64_t> qubitDims(numQubits,2); // qubit dimensions
43  std::cout << "Quantum circuit: " << numQubits << " qubits\n";

Initialize the cuTensorNet library handle

47  // Initialize the cuTensorNet library
48  HANDLE_CUDA_ERROR(cudaSetDevice(0));
49  cutensornetHandle_t cutnHandle;
50  HANDLE_CUTN_ERROR(cutensornetCreate(&cutnHandle));
51  std::cout << "Initialized cuTensorNet library on GPU 0\n";

Define quantum gates on GPU

55  // Define necessary quantum gate tensors in Host memory
56  const double invsq2 = 1.0 / std::sqrt(2.0);
57  //  Hadamard gate
58  const std::vector<std::complex<double>> h_gateH {{invsq2, 0.0},  {invsq2, 0.0},
59                                                   {invsq2, 0.0}, {-invsq2, 0.0}};
60  //  Pauli X gate
61  const std::vector<std::complex<double>> h_gateX {{0.0, 0.0}, {1.0, 0.0},
62                                                   {1.0, 0.0}, {0.0, 0.0}};
63  //  Pauli Y gate
64  const std::vector<std::complex<double>> h_gateY {{0.0, 0.0}, {0.0, -1.0},
65                                                   {0.0, 1.0}, {0.0, 0.0}};
66  //  Pauli Z gate
67  const std::vector<std::complex<double>> h_gateZ {{1.0, 0.0}, {0.0, 0.0},
68                                                   {0.0, 0.0}, {-1.0, 0.0}};
69  //  CX gate
70  const std::vector<std::complex<double>> h_gateCX {{1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
71                                                    {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
72                                                    {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0},
73                                                    {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}};
74
75  // Copy quantum gates to Device memory
76  void *d_gateH{nullptr}, *d_gateX{nullptr}, *d_gateY{nullptr}, *d_gateZ{nullptr}, *d_gateCX{nullptr};
77  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateH, 4 * (2 * fp64size)));
78  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateX, 4 * (2 * fp64size)));
79  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateY, 4 * (2 * fp64size)));
80  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateZ, 4 * (2 * fp64size)));
81  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateCX, 16 * (2 * fp64size)));
82  std::cout << "Allocated quantum gate memory on GPU\n";
83  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateH, h_gateH.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
84  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateX, h_gateX.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
85  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateY, h_gateY.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
86  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateZ, h_gateZ.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
87  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateCX, h_gateCX.data(), 16 * (2 * fp64size), cudaMemcpyHostToDevice));
88  std::cout << "Copied quantum gates to GPU memory\n";

Allocate MPS tensors

Here we set the shapes of MPS tensors and allocate GPU memory for their storage.

 92  // Determine the MPS representation and allocate buffers for the MPS tensors
 93  const int64_t maxExtent = 2; // GHZ state can be exactly represented with max bond dimension of 2
 94  std::vector<std::vector<int64_t>> extents;
 95  std::vector<int64_t*> extentsPtr(numQubits); 
 96  std::vector<void*> d_mpsTensors(numQubits, nullptr);
 97  for (int32_t i = 0; i < numQubits; i++) {
 98    if (i == 0) { // left boundary MPS tensor
 99      extents.push_back({2, maxExtent});
100      HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * 2 * fp64size));
101    } 
102    else if (i == numQubits-1) { // right boundary MPS tensor
103      extents.push_back({maxExtent, 2});
104      HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * 2 * fp64size));
105    }
106    else { // middle MPS tensors
107      extents.push_back({maxExtent, 2, maxExtent});
108      HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * maxExtent * 2 * fp64size));
109    }
110    extentsPtr[i] = extents[i].data();
111  }

Allocate the scratch buffer on GPU

115  // Query the free memory on Device
116  std::size_t freeSize{0}, totalSize{0};
117  HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeSize, &totalSize));
118  const std::size_t scratchSize = (freeSize - (freeSize % 4096)) / 2; // use half of available memory with alignment
119  void *d_scratch{nullptr};
120  HANDLE_CUDA_ERROR(cudaMalloc(&d_scratch, scratchSize));
121  std::cout << "Allocated " << scratchSize << " bytes of scratch memory on GPU\n";

Create a pure tensor network state

Now let’s create a pure tensor network state for a 16-qubit quantum circuit.

125  // Create the initial quantum state
126  cutensornetState_t quantumState;
127  HANDLE_CUTN_ERROR(cutensornetCreateState(cutnHandle, CUTENSORNET_STATE_PURITY_PURE, numQubits, qubitDims.data(),
128                    CUDA_C_64F, &quantumState));
129  std::cout << "Created the initial quantum state\n";

Apply quantum gates

Let’s construct the GHZ quantum circuit by applying the corresponding quantum gates.

133  // Construct the final quantum circuit state (apply quantum gates) for the GHZ circuit
134  int64_t id;
135  HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 1, std::vector<int32_t>{{0}}.data(),
136                    d_gateH, nullptr, 1, 0, 1, &id));
137  for(int32_t i = 1; i < numQubits; ++i) {
138    HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 2, std::vector<int32_t>{{i-1,i}}.data(),
139                      d_gateCX, nullptr, 1, 0, 1, &id));
140  }
141  std::cout << "Applied quantum gates\n";

Request MPS factorization for the final quantum circuit state

Here we express our intent to factorize the final quantum circuit state using MPS factorization. The provided shapes of the MPS tensors refer to their maximal size limit during the MPS renormalization procedure. The actually computed shapes of the final MPS tensors may be smaller. No computation is done here yet.

145  // Specify the final target MPS representation (use default fortran strides)
146  HANDLE_CUTN_ERROR(cutensornetStateFinalizeMPS(cutnHandle, quantumState, 
147                    CUTENSORNET_BOUNDARY_CONDITION_OPEN, extentsPtr.data(), /*strides=*/nullptr ));

Configure MPS factorization procedure

After expressing our intent to perform MPS factorization of the final quantum circuit state, we can also configure the MPS factorization procedure by resetting different options, for example, the SVD algorithm.

151  // Optional, set up the SVD method for truncation.
152  cutensornetTensorSVDAlgo_t algo = CUTENSORNET_TENSOR_SVD_ALGO_GESVDJ; 
153  HANDLE_CUTN_ERROR(cutensornetStateConfigure(cutnHandle, quantumState, 
154                    CUTENSORNET_STATE_CONFIG_MPS_SVD_ALGO, &algo, sizeof(algo)));
155  std::cout << "Configured the MPS computation\n";

Prepare the computation of MPS factorization

Let’s create a workspace descriptor and prepare the computation of MPS factorization.

159  // Prepare the MPS computation and attach workspace
160  cutensornetWorkspaceDescriptor_t workDesc;
161  HANDLE_CUTN_ERROR(cutensornetCreateWorkspaceDescriptor(cutnHandle, &workDesc));
162  std::cout << "Created the workspace descriptor\n";
163  HANDLE_CUTN_ERROR(cutensornetStatePrepare(cutnHandle, quantumState, scratchSize, workDesc, 0x0));
164  std::cout << "Prepared the computation of the quantum circuit state\n";
165  double flops {0.0};
166  HANDLE_CUTN_ERROR(cutensornetStateGetInfo(cutnHandle, quantumState,
167                    CUTENSORNET_STATE_INFO_FLOPS, &flops, sizeof(flops)));
168  if(flops > 0.0) {
169    std::cout << "Total flop count = " << (flops/1e9) << " GFlop\n";
170  }else if(flops < 0.0) {
171    std::cout << "ERROR: Negative Flop count!\n";
172    std::abort();
173  }
174
175  int64_t worksize {0};
176  HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
177                                                      workDesc,
178                                                      CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
179                                                      CUTENSORNET_MEMSPACE_DEVICE,
180                                                      CUTENSORNET_WORKSPACE_SCRATCH,
181                                                      &worksize));
182  std::cout << "Scratch GPU workspace size (bytes) for MPS computation = " << worksize << std::endl;
183  if(worksize <= scratchSize) {
184    HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
185                      CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
186  }else{
187    std::cout << "ERROR: Insufficient workspace size on Device!\n";
188    std::abort();
189  }
190  std::cout << "Set the workspace buffer for MPS computation\n";

Compute MPS factorization

Once the MPS factorization procedure has been configured and prepared, let’s compute the MPS factorization of the final quantum circuit state.

194  // Execute MPS computation
195  HANDLE_CUTN_ERROR(cutensornetStateCompute(cutnHandle, quantumState, 
196                    workDesc, extentsPtr.data(), /*strides=*/nullptr, d_mpsTensors.data(), 0));

Construct a tensor network operator

Now let’s create an empty tensor network operator for 16-qubit states and then append three components to it, where each component is a direct product of Pauli matrices scaled by some complex coefficient (like in the Jordan-Wigner representation).

200  // Create an empty tensor network operator
201  cutensornetNetworkOperator_t hamiltonian;
202  HANDLE_CUTN_ERROR(cutensornetCreateNetworkOperator(cutnHandle, numQubits, qubitDims.data(), CUDA_C_64F, &hamiltonian));
203  // Append component (0.5 * Z1 * Z2) to the tensor network operator
204  {
205    const int32_t numModes[] = {1, 1}; // Z1 acts on 1 mode, Z2 acts on 1 mode
206    const int32_t modesZ1[] = {1}; // state modes Z1 acts on
207    const int32_t modesZ2[] = {2}; // state modes Z2 acts on
208    const int32_t * stateModes[] = {modesZ1, modesZ2}; // state modes (Z1 * Z2) acts on
209    const void * gateData[] = {d_gateZ, d_gateZ}; // GPU pointers to gate data
210    HANDLE_CUTN_ERROR(cutensornetNetworkOperatorAppendProduct(cutnHandle, hamiltonian, cuDoubleComplex{0.5,0.0},
211                      2, numModes, stateModes, NULL, gateData, &id));
212  }
213  // Append component (0.25 * Y3) to the tensor network operator
214  {
215    const int32_t numModes[] = {1}; // Y3 acts on 1 mode
216    const int32_t modesY3[] = {3}; // state modes Y3 acts on
217    const int32_t * stateModes[] = {modesY3}; // state modes (Y3) acts on
218    const void * gateData[] = {d_gateY}; // GPU pointers to gate data
219    HANDLE_CUTN_ERROR(cutensornetNetworkOperatorAppendProduct(cutnHandle, hamiltonian, cuDoubleComplex{0.25,0.0},
220                      1, numModes, stateModes, NULL, gateData, &id));
221  }
222  // Append component (0.13 * Y0 X2 Z3) to the tensor network operator
223  {
224    const int32_t numModes[] = {1, 1, 1}; // Y0 acts on 1 mode, X2 acts on 1 mode, Z3 acts on 1 mode
225    const int32_t modesY0[] = {0}; // state modes Y0 acts on
226    const int32_t modesX2[] = {2}; // state modes X2 acts on
227    const int32_t modesZ3[] = {3}; // state modes Z3 acts on
228    const int32_t * stateModes[] = {modesY0, modesX2, modesZ3}; // state modes (Y0 * X2 * Z3) acts on
229    const void * gateData[] = {d_gateY, d_gateX, d_gateZ}; // GPU pointers to gate data
230    HANDLE_CUTN_ERROR(cutensornetNetworkOperatorAppendProduct(cutnHandle, hamiltonian, cuDoubleComplex{0.13,0.0},
231                      3, numModes, stateModes, NULL, gateData, &id));
232  }
233  std::cout << "Constructed a tensor network operator: (0.5 * Z1 * Z2) + (0.25 * Y3) + (0.13 * Y0 * X2 * Z3)" << std::endl;

Create the expectation value object

Once the quantum circuit and the tensor network operator have been constructed, and the final quantum circuit state has been factorized using the MPS representation, let’s create the expectation value object that will compute the expectation value of the specified tensor network operator over the final MPS-factorized state of the specified quantum circuit.

237  // Specify the quantum circuit expectation value
238  cutensornetStateExpectation_t expectation;
239  HANDLE_CUTN_ERROR(cutensornetCreateExpectation(cutnHandle, quantumState, hamiltonian, &expectation));
240  std::cout << "Created the specified quantum circuit expectation value\n";

Configure the expectation value calculation

Now we can configure the expectation value object by setting the number of hyper-samples to be used by the tensor network contraction path finder.

244  // Configure the computation of the specified quantum circuit expectation value
245  const int32_t numHyperSamples = 8; // desired number of hyper samples used in the tensor network contraction path finder
246  HANDLE_CUTN_ERROR(cutensornetExpectationConfigure(cutnHandle, expectation,
247                    CUTENSORNET_EXPECTATION_CONFIG_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples)));

Prepare the expectation value calculation

Let’s prepare the computation of the desired expectation value.

251  // Prepare the specified quantum circuit expectation value for computation
252  HANDLE_CUTN_ERROR(cutensornetExpectationPrepare(cutnHandle, expectation, scratchSize, workDesc, 0x0));
253  std::cout << "Prepared the specified quantum circuit expectation value\n";
254  flops = 0.0;
255  HANDLE_CUTN_ERROR(cutensornetExpectationGetInfo(cutnHandle, expectation,
256                    CUTENSORNET_EXPECTATION_INFO_FLOPS, &flops, sizeof(flops)));
257  std::cout << "Total flop count = " << (flops/1e9) << " GFlop\n";
258  if(flops <= 0.0) {
259    std::cout << "ERROR: Invalid Flop count!\n";
260    std::abort();
261  }

Set up the workspace

Now we can set up the required workspace buffer.

265  // Attach the workspace buffer
266  HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
267                                                      workDesc,
268                                                      CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
269                                                      CUTENSORNET_MEMSPACE_DEVICE,
270                                                      CUTENSORNET_WORKSPACE_SCRATCH,
271                                                      &worksize));
272  std::cout << "Required scratch GPU workspace size (bytes) = " << worksize << std::endl;
273  if(worksize <= scratchSize) {
274    HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
275                      CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
276  }else{
277    std::cout << "ERROR: Insufficient workspace size on Device!\n";
278    std::abort();
279  }
280  std::cout << "Set the workspace buffer\n";

Compute the requested expectation value

Once everything has been set up, we compute the requested expectation value and print it. Note that the returned expectation value is not normalized. The 2-norm of the tensor network state is returned as a separate argument.

284  // Compute the specified quantum circuit expectation value
285  std::complex<double> expectVal{0.0,0.0}, stateNorm2{0.0,0.0};
286  HANDLE_CUTN_ERROR(cutensornetExpectationCompute(cutnHandle, expectation, workDesc,
287                    static_cast<void*>(&expectVal), static_cast<void*>(&stateNorm2), 0x0));
288  std::cout << "Computed the specified quantum circuit expectation value\n";
289  expectVal /= stateNorm2;
290  std::cout << "Expectation value = (" << expectVal.real() << ", " << expectVal.imag() << ")\n";
291  std::cout << "Squared 2-norm of the state = (" << stateNorm2.real() << ", " << stateNorm2.imag() << ")\n";

Free resources

295  // Destroy the workspace descriptor
296  HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc));
297  std::cout << "Destroyed the workspace descriptor\n";
298
299  // Destroy the quantum circuit expectation value
300  HANDLE_CUTN_ERROR(cutensornetDestroyExpectation(expectation));
301  std::cout << "Destroyed the quantum circuit state expectation value\n";
302
303  // Destroy the tensor network operator
304  HANDLE_CUTN_ERROR(cutensornetDestroyNetworkOperator(hamiltonian));
305  std::cout << "Destroyed the tensor network operator\n";
306
307  // Destroy the quantum circuit state
308  HANDLE_CUTN_ERROR(cutensornetDestroyState(quantumState));
309  std::cout << "Destroyed the quantum circuit state\n";
310
311  for (int32_t i = 0; i < numQubits; i++) {
312    HANDLE_CUDA_ERROR(cudaFree(d_mpsTensors[i]));
313  }
314  HANDLE_CUDA_ERROR(cudaFree(d_scratch));
315  HANDLE_CUDA_ERROR(cudaFree(d_gateCX));
316  HANDLE_CUDA_ERROR(cudaFree(d_gateZ));
317  HANDLE_CUDA_ERROR(cudaFree(d_gateY));
318  HANDLE_CUDA_ERROR(cudaFree(d_gateX));
319  HANDLE_CUDA_ERROR(cudaFree(d_gateH));
320  std::cout << "Freed memory on GPU\n";
321
322  // Finalize the cuTensorNet library
323  HANDLE_CUTN_ERROR(cutensornetDestroy(cutnHandle));
324  std::cout << "Finalized the cuTensorNet library\n";
325
326  return 0;
327}