# Computing Tensor Network State Amplitudes¶

The following code example illustrates how to define a tensor network state and then compute a slice of amplitudes of the tensor network state. The full code can be found in the NVIDIA/cuQuantum repository (here).

# Headers and error handling¶

``` 7#include <cstdlib>
8#include <cstdio>
9#include <cassert>
10#include <complex>
11#include <vector>
12#include <bitset>
13#include <iostream>
14
15#include <cuda_runtime.h>
16#include <cutensornet.h>
17
18
19#define HANDLE_CUDA_ERROR(x) \
20{ const auto err = x; \
21  if( err != cudaSuccess ) \
22  { printf("CUDA error %s in line %d\n", cudaGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
23};
24
25#define HANDLE_CUTN_ERROR(x) \
26{ const auto err = x; \
27  if( err != CUTENSORNET_STATUS_SUCCESS ) \
28  { printf("cuTensorNet error %s in line %d\n", cutensornetGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
29};
30
31
32int main()
33{
34  static_assert(sizeof(size_t) == sizeof(int64_t), "Please build this sample on a 64-bit architecture!");
35
36  constexpr std::size_t fp64size = sizeof(double);
```

# Define the tensor network state and the desired slice of state amplitudes¶

Let’s define a tensor network state corresponding to a 6-qubit quantum circuit and request a slice of state amplitudes where qubits 0 and 1 are fixed at value 1.

```40  // Quantum state configuration
41  constexpr int32_t numQubits = 6; // number of qubits
42  const std::vector<int64_t> qubitDims(numQubits,2); // qubit dimensions
43  const std::vector<int32_t> fixedModes({0,1}); // fixed modes in the output amplitude tensor (must be in acsending order)
44  const std::vector<int64_t> fixedValues({1,1}); // values of the fixed modes in the output amplitude tensor
45  const int32_t numFixedModes = fixedModes.size(); // number of fixed modes in the output amplitude tensor
46  std::cout << "Quantum circuit: " << numQubits << " qubits\n";
```

# Initialize the cuTensorNet library handle¶

```50  // Initialize the cuTensorNet library
51  HANDLE_CUDA_ERROR(cudaSetDevice(0));
52  cutensornetHandle_t cutnHandle;
53  HANDLE_CUTN_ERROR(cutensornetCreate(&cutnHandle));
54  std::cout << "Initialized cuTensorNet library on GPU 0\n";
```

# Define quantum gates on GPU¶

```58  // Define necessary quantum gate tensors in Host memory
59  const double invsq2 = 1.0 / std::sqrt(2.0);
60  //  Hadamard gate
61  const std::vector<std::complex<double>> h_gateH {{invsq2, 0.0},  {invsq2, 0.0},
62                                                   {invsq2, 0.0}, {-invsq2, 0.0}};
63  //  CX gate
64  const std::vector<std::complex<double>> h_gateCX {{1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
65                                                    {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
66                                                    {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0},
67                                                    {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}};
68
69  // Copy quantum gates to Device memory
70  void *d_gateH{nullptr}, *d_gateCX{nullptr};
71  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateH, 4 * (2 * fp64size)));
72  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateCX, 16 * (2 * fp64size)));
73  std::cout << "Allocated quantum gate memory on GPU\n";
74  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateH, h_gateH.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
75  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateCX, h_gateCX.data(), 16 * (2 * fp64size), cudaMemcpyHostToDevice));
76  std::cout << "Copied quantum gates to GPU memory\n";
```

# Allocate the amplitudes slice tensor on GPU¶

Here we allocate GPU memory for the requested amplitudes slice tensor.

```80  // Allocate Device memory for the specified slice of the quantum circuit amplitudes tensor
81  void *d_amp{nullptr};
82  std::size_t ampSize = 1;
83  for(const auto & qubitDim: qubitDims) ampSize *= qubitDim; // all state modes (full size)
84  for(const auto & fixedMode: fixedModes) ampSize /= qubitDims[fixedMode]; // fixed state modes reduce the slice size
85  HANDLE_CUDA_ERROR(cudaMalloc(&d_amp, ampSize * (2 * fp64size)));
86  std::cout << "Allocated memory for the specified slice of the quantum circuit amplitude tensor of size "
87            << ampSize << " elements\n";
```

# Allocate the scratch buffer on GPU¶

```91  // Query the free memory on Device
92  std::size_t freeSize{0}, totalSize{0};
93  HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeSize, &totalSize));
94  const std::size_t scratchSize = (freeSize - (freeSize % 4096)) / 2; // use half of available memory with alignment
95  void *d_scratch{nullptr};
96  HANDLE_CUDA_ERROR(cudaMalloc(&d_scratch, scratchSize));
97  std::cout << "Allocated " << scratchSize << " bytes of scratch memory on GPU\n";
```

# Create a pure tensor network state¶

Now let’s create a pure tensor network state for a 6-qubit quantum circuit.

```101  // Create the initial quantum state
102  cutensornetState_t quantumState;
103  HANDLE_CUTN_ERROR(cutensornetCreateState(cutnHandle, CUTENSORNET_STATE_PURITY_PURE, numQubits, qubitDims.data(),
104                    CUDA_C_64F, &quantumState));
105  std::cout << "Created the initial quantum state\n";
```

# Apply quantum gates¶

Let’s construct the GHZ quantum circuit by applying the corresponding quantum gates.

```109  // Construct the final quantum circuit state (apply quantum gates) for the GHZ circuit
110  int64_t id;
111  HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 1, std::vector<int32_t>{{0}}.data(),
112                    d_gateH, nullptr, 1, 0, 1, &id));
113  for(int32_t i = 1; i < numQubits; ++i) {
114    HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 2, std::vector<int32_t>{{i-1,i}}.data(),
115                      d_gateCX, nullptr, 1, 0, 1, &id));
116  }
117  std::cout << "Applied quantum gates\n";
```

# Create the state amplitudes accessor¶

Once the quantum circuit has been constructed, let’s create the amplitudes accessor object that will compute the requested slice of state amplitudes.

```121  // Specify the quantum circuit amplitudes accessor
122  cutensornetStateAccessor_t accessor;
123  HANDLE_CUTN_ERROR(cutensornetCreateAccessor(cutnHandle, quantumState, numFixedModes, fixedModes.data(),
124                    nullptr, &accessor)); // using default strides
125  std::cout << "Created the specified quantum circuit amplitudes accessor\n";
```

# Configure the state amplitudes accessor¶

Now we can configure the state amplitudes accessor object by setting the number of hyper-samples to be used by the tensor network contraction path finder.

```129  // Configure the computation of the slice of the specified quantum circuit amplitudes tensor
130  const int32_t numHyperSamples = 8; // desired number of hyper samples used in the tensor network contraction path finder
131  HANDLE_CUTN_ERROR(cutensornetAccessorConfigure(cutnHandle, accessor,
132                    CUTENSORNET_ACCESSOR_CONFIG_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples)));
```

# Prepare the computation of the amplitudes slice tensor¶

Let’s create a workspace descriptor and prepare the computation of the amplitudes slice tensor.

```136  // Prepare the computation of the specified slice of the quantum circuit amplitudes tensor
137  cutensornetWorkspaceDescriptor_t workDesc;
138  HANDLE_CUTN_ERROR(cutensornetCreateWorkspaceDescriptor(cutnHandle, &workDesc));
139  std::cout << "Created the workspace descriptor\n";
140  HANDLE_CUTN_ERROR(cutensornetAccessorPrepare(cutnHandle, accessor, scratchSize, workDesc, 0x0));
141  std::cout << "Prepared the computation of the specified slice of the quantum circuit amplitudes tensor\n";
142  double flops {0.0};
143  HANDLE_CUTN_ERROR(cutensornetAccessorGetInfo(cutnHandle, accessor,
144                    CUTENSORNET_ACCESSOR_INFO_FLOPS, &flops, sizeof(flops)));
145  std::cout << "Total flop count = " << (flops/1e9) << " GFlop\n";
```

# Set up the workspace¶

Now we can set up the required workspace buffer.

```149  // Attach the workspace buffer
150  int64_t worksize {0};
151  HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
152                                                      workDesc,
153                                                      CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
154                                                      CUTENSORNET_MEMSPACE_DEVICE,
155                                                      CUTENSORNET_WORKSPACE_SCRATCH,
156                                                      &worksize));
157  std::cout << "Required scratch GPU workspace size (bytes) = " << worksize << std::endl;
158  if(worksize <= scratchSize) {
159    HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
160                      CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
161  }else{
162    std::cout << "ERROR: Insufficient workspace size on Device!\n";
163    std::abort();
164  }
165  std::cout << "Set the workspace buffer\n";
```

# Compute the specified slice of state amplitudes¶

Once everything has been set up, we compute the requested slice of state amplitudes, copy it back to Host memory, and print it.

```169  // Compute the specified slice of the quantum circuit amplitudes tensor
170  std::complex<double> stateNorm2{0.0,0.0};
171  HANDLE_CUTN_ERROR(cutensornetAccessorCompute(cutnHandle, accessor, fixedValues.data(),
172                    workDesc, d_amp, static_cast<void*>(&stateNorm2), 0x0));
173  std::cout << "Computed the specified slice of the quantum circuit amplitudes tensor\n";
174  std::vector<std::complex<double>> h_amp(ampSize);
175  HANDLE_CUDA_ERROR(cudaMemcpy(h_amp.data(), d_amp, ampSize * (2 * fp64size), cudaMemcpyDeviceToHost));
176  std::cout << "Amplitudes slice for " << (numQubits - numFixedModes) << " qubits:\n";
177  for(std::size_t i = 0; i < ampSize; ++i) {
178    std::cout << " " << h_amp[i] << std::endl;
179  }
180  std::cout << "Squared 2-norm of the state = (" << stateNorm2.real() << ", " << stateNorm2.imag() << ")\n";
```

# Free resources¶

```184  // Destroy the workspace descriptor
185  HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc));
186  std::cout << "Destroyed the workspace descriptor\n";
187
188  // Destroy the quantum circuit amplitudes accessor
189  HANDLE_CUTN_ERROR(cutensornetDestroyAccessor(accessor));
190  std::cout << "Destroyed the quantum circuit amplitudes accessor\n";
191
192  // Destroy the quantum circuit state
193  HANDLE_CUTN_ERROR(cutensornetDestroyState(quantumState));
194  std::cout << "Destroyed the quantum circuit state\n";
195
196  HANDLE_CUDA_ERROR(cudaFree(d_scratch));
197  HANDLE_CUDA_ERROR(cudaFree(d_amp));
198  HANDLE_CUDA_ERROR(cudaFree(d_gateCX));
199  HANDLE_CUDA_ERROR(cudaFree(d_gateH));
200  std::cout << "Freed memory on GPU\n";
201
202  // Finalize the cuTensorNet library
203  HANDLE_CUTN_ERROR(cutensornetDestroy(cutnHandle));
204  std::cout << "Finalized the cuTensorNet library\n";
205
206  return 0;
207}
```