Computing tensor network state amplitudes¶
The following code example illustrates how to define a tensor network state and then compute a slice of amplitudes of the tensor network state. The full code can be found in the NVIDIA/cuQuantum repository (here).
Headers and error handling¶
7#include <cstdlib>
8#include <cstdio>
9#include <cassert>
10#include <complex>
11#include <vector>
12#include <bitset>
13#include <iostream>
14
15#include <cuda_runtime.h>
16#include <cutensornet.h>
17
18
19#define HANDLE_CUDA_ERROR(x) \
20{ const auto err = x; \
21 if( err != cudaSuccess ) \
22 { printf("CUDA error %s in line %d\n", cudaGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
23};
24
25#define HANDLE_CUTN_ERROR(x) \
26{ const auto err = x; \
27 if( err != CUTENSORNET_STATUS_SUCCESS ) \
28 { printf("cuTensorNet error %s in line %d\n", cutensornetGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
29};
30
31
32int main()
33{
34 static_assert(sizeof(size_t) == sizeof(int64_t), "Please build this sample on a 64-bit architecture!");
35
36 constexpr std::size_t fp64size = sizeof(double);
Define the tensor network state and the desired slice of state amplitudes¶
Let’s define a tensor network state corresponding to a 6-qubit quantum circuit and request a slice of state amplitudes where qubits 0 and 1 are fixed at value 1.
40 // Quantum state configuration
41 constexpr int32_t numQubits = 6; // number of qubits
42 const std::vector<int64_t> qubitDims(numQubits,2); // qubit dimensions
43 const std::vector<int32_t> fixedModes({0,1}); // fixed modes in the output amplitude tensor (must be in acsending order)
44 const std::vector<int64_t> fixedValues({1,1}); // values of the fixed modes in the output amplitude tensor
45 const int32_t numFixedModes = fixedModes.size(); // number of fixed modes in the output amplitude tensor
46 std::cout << "Quantum circuit: " << numQubits << " qubits\n";
Initialize the cuTensorNet library handle¶
50 // Initialize the cuTensorNet library
51 HANDLE_CUDA_ERROR(cudaSetDevice(0));
52 cutensornetHandle_t cutnHandle;
53 HANDLE_CUTN_ERROR(cutensornetCreate(&cutnHandle));
54 std::cout << "Initialized cuTensorNet library on GPU 0\n";
Define quantum gates on GPU¶
58 // Define necessary quantum gate tensors in Host memory
59 const double invsq2 = 1.0 / std::sqrt(2.0);
60 // Hadamard gate
61 const std::vector<std::complex<double>> h_gateH {{invsq2, 0.0}, {invsq2, 0.0},
62 {invsq2, 0.0}, {-invsq2, 0.0}};
63 // CX gate
64 const std::vector<std::complex<double>> h_gateCX {{1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
65 {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
66 {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0},
67 {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}};
68
69 // Copy quantum gates to Device memory
70 void *d_gateH{nullptr}, *d_gateCX{nullptr};
71 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateH, 4 * (2 * fp64size)));
72 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateCX, 16 * (2 * fp64size)));
73 std::cout << "Allocated quantum gate memory on GPU\n";
74 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateH, h_gateH.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
75 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateCX, h_gateCX.data(), 16 * (2 * fp64size), cudaMemcpyHostToDevice));
76 std::cout << "Copied quantum gates to GPU memory\n";
Allocate the amplitudes slice tensor on GPU¶
Here we allocate GPU memory for the requested amplitudes slice tensor.
80 // Allocate Device memory for the specified slice of the quantum circuit amplitudes tensor
81 void *d_amp{nullptr};
82 std::size_t ampSize = 1;
83 for(const auto & qubitDim: qubitDims) ampSize *= qubitDim; // all state modes (full size)
84 for(const auto & fixedMode: fixedModes) ampSize /= qubitDims[fixedMode]; // fixed state modes reduce the slice size
85 HANDLE_CUDA_ERROR(cudaMalloc(&d_amp, ampSize * (2 * fp64size)));
86 std::cout << "Allocated memory for the specified slice of the quantum circuit amplitude tensor of size "
87 << ampSize << " elements\n";
Allocate the scratch buffer on GPU¶
91 // Query the free memory on Device
92 std::size_t freeSize{0}, totalSize{0};
93 HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeSize, &totalSize));
94 const std::size_t scratchSize = (freeSize - (freeSize % 4096)) / 2; // use half of available memory with alignment
95 void *d_scratch{nullptr};
96 HANDLE_CUDA_ERROR(cudaMalloc(&d_scratch, scratchSize));
97 std::cout << "Allocated " << scratchSize << " bytes of scratch memory on GPU\n";
Create a pure tensor network state¶
Now let’s create a pure tensor network state for a 6-qubit quantum circuit.
101 // Create the initial quantum state
102 cutensornetState_t quantumState;
103 HANDLE_CUTN_ERROR(cutensornetCreateState(cutnHandle, CUTENSORNET_STATE_PURITY_PURE, numQubits, qubitDims.data(),
104 CUDA_C_64F, &quantumState));
105 std::cout << "Created the initial quantum state\n";
Apply quantum gates¶
Let’s construct the GHZ quantum circuit by applying the corresponding quantum gates.
109 // Construct the final quantum circuit state (apply quantum gates) for the GHZ circuit
110 int64_t id;
111 HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 1, std::vector<int32_t>{{0}}.data(),
112 d_gateH, nullptr, 1, 0, 1, &id));
113 for(int32_t i = 1; i < numQubits; ++i) {
114 HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 2, std::vector<int32_t>{{i-1,i}}.data(),
115 d_gateCX, nullptr, 1, 0, 1, &id));
116 }
117 std::cout << "Applied quantum gates\n";
Create the state amplitudes accessor¶
Once the quantum circuit has been constructed, let’s create the amplitudes accessor object that will compute the requested slice of state amplitudes.
121 // Specify the quantum circuit amplitudes accessor
122 cutensornetStateAccessor_t accessor;
123 HANDLE_CUTN_ERROR(cutensornetCreateAccessor(cutnHandle, quantumState, numFixedModes, fixedModes.data(),
124 nullptr, &accessor)); // using default strides
125 std::cout << "Created the specified quantum circuit amplitudes accessor\n";
Configure the state amplitudes accessor¶
Now we can configure the state amplitudes accessor object by setting the number of hyper-samples to be used by the tensor network contraction path finder.
129 // Configure the computation of the slice of the specified quantum circuit amplitudes tensor
130 const int32_t numHyperSamples = 8; // desired number of hyper samples used in the tensor network contraction path finder
131 HANDLE_CUTN_ERROR(cutensornetAccessorConfigure(cutnHandle, accessor,
132 CUTENSORNET_ACCESSOR_CONFIG_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples)));
Prepare the computation of the amplitudes slice tensor¶
Let’s create a workspace descriptor and prepare the computation of the amplitudes slice tensor.
136 // Prepare the computation of the specified slice of the quantum circuit amplitudes tensor
137 cutensornetWorkspaceDescriptor_t workDesc;
138 HANDLE_CUTN_ERROR(cutensornetCreateWorkspaceDescriptor(cutnHandle, &workDesc));
139 std::cout << "Created the workspace descriptor\n";
140 HANDLE_CUTN_ERROR(cutensornetAccessorPrepare(cutnHandle, accessor, scratchSize, workDesc, 0x0));
141 std::cout << "Prepared the computation of the specified slice of the quantum circuit amplitudes tensor\n";
142 double flops {0.0};
143 HANDLE_CUTN_ERROR(cutensornetAccessorGetInfo(cutnHandle, accessor,
144 CUTENSORNET_ACCESSOR_INFO_FLOPS, &flops, sizeof(flops)));
145 std::cout << "Total flop count = " << (flops/1e9) << " GFlop\n";
Set up the workspace¶
Now we can set up the required workspace buffer.
149 // Attach the workspace buffer
150 int64_t worksize {0};
151 HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
152 workDesc,
153 CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
154 CUTENSORNET_MEMSPACE_DEVICE,
155 CUTENSORNET_WORKSPACE_SCRATCH,
156 &worksize));
157 std::cout << "Required scratch GPU workspace size (bytes) = " << worksize << std::endl;
158 if(worksize <= scratchSize) {
159 HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
160 CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
161 }else{
162 std::cout << "ERROR: Insufficient workspace size on Device!\n";
163 std::abort();
164 }
165 std::cout << "Set the workspace buffer\n";
Compute the specified slice of state amplitudes¶
Once everything has been set up, we compute the requested slice of state amplitudes, copy it back to Host memory, and print it.
169 // Compute the specified slice of the quantum circuit amplitudes tensor
170 std::complex<double> stateNorm2{0.0,0.0};
171 HANDLE_CUTN_ERROR(cutensornetAccessorCompute(cutnHandle, accessor, fixedValues.data(),
172 workDesc, d_amp, static_cast<void*>(&stateNorm2), 0x0));
173 std::cout << "Computed the specified slice of the quantum circuit amplitudes tensor\n";
174 std::vector<std::complex<double>> h_amp(ampSize);
175 HANDLE_CUDA_ERROR(cudaMemcpy(h_amp.data(), d_amp, ampSize * (2 * fp64size), cudaMemcpyDeviceToHost));
176 std::cout << "Amplitudes slice for " << (numQubits - numFixedModes) << " qubits:\n";
177 for(std::size_t i = 0; i < ampSize; ++i) {
178 std::cout << " " << h_amp[i] << std::endl;
179 }
180 std::cout << "Squared 2-norm of the state = (" << stateNorm2.real() << ", " << stateNorm2.imag() << ")\n";
Free resources¶
184 // Destroy the workspace descriptor
185 HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc));
186 std::cout << "Destroyed the workspace descriptor\n";
187
188 // Destroy the quantum circuit amplitudes accessor
189 HANDLE_CUTN_ERROR(cutensornetDestroyAccessor(accessor));
190 std::cout << "Destroyed the quantum circuit amplitudes accessor\n";
191
192 // Destroy the quantum circuit state
193 HANDLE_CUTN_ERROR(cutensornetDestroyState(quantumState));
194 std::cout << "Destroyed the quantum circuit state\n";
195
196 HANDLE_CUDA_ERROR(cudaFree(d_scratch));
197 HANDLE_CUDA_ERROR(cudaFree(d_amp));
198 HANDLE_CUDA_ERROR(cudaFree(d_gateCX));
199 HANDLE_CUDA_ERROR(cudaFree(d_gateH));
200 std::cout << "Freed memory on GPU\n";
201
202 // Finalize the cuTensorNet library
203 HANDLE_CUTN_ERROR(cutensornetDestroy(cutnHandle));
204 std::cout << "Finalized the cuTensorNet library\n";
205
206 return 0;
207}