Computing Tensor Network State Amplitudes

The following code example illustrates how to define a tensor network state and then compute a slice of amplitudes of the tensor network state. The full code can be found in the NVIDIA/cuQuantum repository (here).

Headers and error handling

 7#include <cstdlib>
 8#include <cstdio>
 9#include <cassert>
10#include <complex>
11#include <vector>
12#include <bitset>
13#include <iostream>
14
15#include <cuda_runtime.h>
16#include <cutensornet.h>
17
18
19#define HANDLE_CUDA_ERROR(x) \
20{ const auto err = x; \
21  if( err != cudaSuccess ) \
22  { printf("CUDA error %s in line %d\n", cudaGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
23};
24
25#define HANDLE_CUTN_ERROR(x) \
26{ const auto err = x; \
27  if( err != CUTENSORNET_STATUS_SUCCESS ) \
28  { printf("cuTensorNet error %s in line %d\n", cutensornetGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
29};
30
31
32int main()
33{
34  static_assert(sizeof(size_t) == sizeof(int64_t), "Please build this sample on a 64-bit architecture!");
35
36  constexpr std::size_t fp64size = sizeof(double);

Define the tensor network state and the desired slice of state amplitudes

Let’s define a tensor network state corresponding to a 6-qubit quantum circuit and request a slice of state amplitudes where qubits 0 and 1 are fixed at value 1.

40  // Quantum state configuration
41  constexpr int32_t numQubits = 6; // number of qubits
42  const std::vector<int64_t> qubitDims(numQubits,2); // qubit dimensions
43  const std::vector<int32_t> fixedModes({0,1}); // fixed modes in the output amplitude tensor (must be in acsending order)
44  const std::vector<int64_t> fixedValues({1,1}); // values of the fixed modes in the output amplitude tensor
45  const int32_t numFixedModes = fixedModes.size(); // number of fixed modes in the output amplitude tensor
46  std::cout << "Quantum circuit: " << numQubits << " qubits\n";

Initialize the cuTensorNet library handle

50  // Initialize the cuTensorNet library
51  HANDLE_CUDA_ERROR(cudaSetDevice(0));
52  cutensornetHandle_t cutnHandle;
53  HANDLE_CUTN_ERROR(cutensornetCreate(&cutnHandle));
54  std::cout << "Initialized cuTensorNet library on GPU 0\n";

Define quantum gates on GPU

58  // Define necessary quantum gate tensors in Host memory
59  const double invsq2 = 1.0 / std::sqrt(2.0);
60  //  Hadamard gate
61  const std::vector<std::complex<double>> h_gateH {{invsq2, 0.0},  {invsq2, 0.0},
62                                                   {invsq2, 0.0}, {-invsq2, 0.0}};
63  //  CX gate
64  const std::vector<std::complex<double>> h_gateCX {{1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
65                                                    {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
66                                                    {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0},
67                                                    {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}};
68
69  // Copy quantum gates to Device memory
70  void *d_gateH{nullptr}, *d_gateCX{nullptr};
71  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateH, 4 * (2 * fp64size)));
72  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateCX, 16 * (2 * fp64size)));
73  std::cout << "Allocated quantum gate memory on GPU\n";
74  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateH, h_gateH.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
75  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateCX, h_gateCX.data(), 16 * (2 * fp64size), cudaMemcpyHostToDevice));
76  std::cout << "Copied quantum gates to GPU memory\n";

Allocate the amplitudes slice tensor on GPU

Here we allocate GPU memory for the requested amplitudes slice tensor.

80  // Allocate Device memory for the specified slice of the quantum circuit amplitudes tensor
81  void *d_amp{nullptr};
82  std::size_t ampSize = 1;
83  for(const auto & qubitDim: qubitDims) ampSize *= qubitDim; // all state modes (full size)
84  for(const auto & fixedMode: fixedModes) ampSize /= qubitDims[fixedMode]; // fixed state modes reduce the slice size
85  HANDLE_CUDA_ERROR(cudaMalloc(&d_amp, ampSize * (2 * fp64size)));
86  std::cout << "Allocated memory for the specified slice of the quantum circuit amplitude tensor of size "
87            << ampSize << " elements\n";

Allocate the scratch buffer on GPU

91  // Query the free memory on Device
92  std::size_t freeSize{0}, totalSize{0};
93  HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeSize, &totalSize));
94  const std::size_t scratchSize = (freeSize - (freeSize % 4096)) / 2; // use half of available memory with alignment
95  void *d_scratch{nullptr};
96  HANDLE_CUDA_ERROR(cudaMalloc(&d_scratch, scratchSize));
97  std::cout << "Allocated " << scratchSize << " bytes of scratch memory on GPU\n";

Create a pure tensor network state

Now let’s create a pure tensor network state for a 6-qubit quantum circuit.

101  // Create the initial quantum state
102  cutensornetState_t quantumState;
103  HANDLE_CUTN_ERROR(cutensornetCreateState(cutnHandle, CUTENSORNET_STATE_PURITY_PURE, numQubits, qubitDims.data(),
104                    CUDA_C_64F, &quantumState));
105  std::cout << "Created the initial quantum state\n";

Apply quantum gates

Let’s construct the GHZ quantum circuit by applying the corresponding quantum gates.

109  // Construct the final quantum circuit state (apply quantum gates) for the GHZ circuit
110  int64_t id;
111  HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 1, std::vector<int32_t>{{0}}.data(),
112                    d_gateH, nullptr, 1, 0, 1, &id));
113  for(int32_t i = 1; i < numQubits; ++i) {
114    HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 2, std::vector<int32_t>{{i-1,i}}.data(),
115                      d_gateCX, nullptr, 1, 0, 1, &id));
116  }
117  std::cout << "Applied quantum gates\n";

Create the state amplitudes accessor

Once the quantum circuit has been constructed, let’s create the amplitudes accessor object that will compute the requested slice of state amplitudes.

121  // Specify the quantum circuit amplitudes accessor
122  cutensornetStateAccessor_t accessor;
123  HANDLE_CUTN_ERROR(cutensornetCreateAccessor(cutnHandle, quantumState, numFixedModes, fixedModes.data(),
124                    nullptr, &accessor)); // using default strides
125  std::cout << "Created the specified quantum circuit amplitudes accessor\n";

Configure the state amplitudes accessor

Now we can configure the state amplitudes accessor object by setting the number of hyper-samples to be used by the tensor network contraction path finder.

129  // Configure the computation of the slice of the specified quantum circuit amplitudes tensor
130  const int32_t numHyperSamples = 8; // desired number of hyper samples used in the tensor network contraction path finder
131  HANDLE_CUTN_ERROR(cutensornetAccessorConfigure(cutnHandle, accessor,
132                    CUTENSORNET_ACCESSOR_CONFIG_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples)));

Prepare the computation of the amplitudes slice tensor

Let’s create a workspace descriptor and prepare the computation of the amplitudes slice tensor.

136  // Prepare the computation of the specified slice of the quantum circuit amplitudes tensor
137  cutensornetWorkspaceDescriptor_t workDesc;
138  HANDLE_CUTN_ERROR(cutensornetCreateWorkspaceDescriptor(cutnHandle, &workDesc));
139  std::cout << "Created the workspace descriptor\n";
140  HANDLE_CUTN_ERROR(cutensornetAccessorPrepare(cutnHandle, accessor, scratchSize, workDesc, 0x0));
141  std::cout << "Prepared the computation of the specified slice of the quantum circuit amplitudes tensor\n";
142  double flops {0.0};
143  HANDLE_CUTN_ERROR(cutensornetAccessorGetInfo(cutnHandle, accessor,
144                    CUTENSORNET_ACCESSOR_INFO_FLOPS, &flops, sizeof(flops)));
145  std::cout << "Total flop count = " << (flops/1e9) << " GFlop\n";

Set up the workspace

Now we can set up the required workspace buffer.

149  // Attach the workspace buffer
150  int64_t worksize {0};
151  HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
152                                                      workDesc,
153                                                      CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
154                                                      CUTENSORNET_MEMSPACE_DEVICE,
155                                                      CUTENSORNET_WORKSPACE_SCRATCH,
156                                                      &worksize));
157  std::cout << "Required scratch GPU workspace size (bytes) = " << worksize << std::endl;
158  if(worksize <= scratchSize) {
159    HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
160                      CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
161  }else{
162    std::cout << "ERROR: Insufficient workspace size on Device!\n";
163    std::abort();
164  }
165  std::cout << "Set the workspace buffer\n";

Compute the specified slice of state amplitudes

Once everything has been set up, we compute the requested slice of state amplitudes, copy it back to Host memory, and print it.

169  // Compute the specified slice of the quantum circuit amplitudes tensor
170  std::complex<double> stateNorm2{0.0,0.0};
171  HANDLE_CUTN_ERROR(cutensornetAccessorCompute(cutnHandle, accessor, fixedValues.data(),
172                    workDesc, d_amp, static_cast<void*>(&stateNorm2), 0x0));
173  std::cout << "Computed the specified slice of the quantum circuit amplitudes tensor\n";
174  std::vector<std::complex<double>> h_amp(ampSize);
175  HANDLE_CUDA_ERROR(cudaMemcpy(h_amp.data(), d_amp, ampSize * (2 * fp64size), cudaMemcpyDeviceToHost));
176  std::cout << "Amplitudes slice for " << (numQubits - numFixedModes) << " qubits:\n";
177  for(std::size_t i = 0; i < ampSize; ++i) {
178    std::cout << " " << h_amp[i] << std::endl;
179  }
180  std::cout << "Squared 2-norm of the state = (" << stateNorm2.real() << ", " << stateNorm2.imag() << ")\n";

Free resources

184  // Destroy the workspace descriptor
185  HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc));
186  std::cout << "Destroyed the workspace descriptor\n";
187
188  // Destroy the quantum circuit amplitudes accessor
189  HANDLE_CUTN_ERROR(cutensornetDestroyAccessor(accessor));
190  std::cout << "Destroyed the quantum circuit amplitudes accessor\n";
191
192  // Destroy the quantum circuit state
193  HANDLE_CUTN_ERROR(cutensornetDestroyState(quantumState));
194  std::cout << "Destroyed the quantum circuit state\n";
195
196  HANDLE_CUDA_ERROR(cudaFree(d_scratch));
197  HANDLE_CUDA_ERROR(cudaFree(d_amp));
198  HANDLE_CUDA_ERROR(cudaFree(d_gateCX));
199  HANDLE_CUDA_ERROR(cudaFree(d_gateH));
200  std::cout << "Freed memory on GPU\n";
201
202  // Finalize the cuTensorNet library
203  HANDLE_CUTN_ERROR(cutensornetDestroy(cutnHandle));
204  std::cout << "Finalized the cuTensorNet library\n";
205
206  return 0;
207}