Code example (expectation value with gradients)#
The following code example illustrates how to compute the expectation value \(E = \langle\psi|H|\psi\rangle\) of a tensor network operator (Hamiltonian) over a tensor network state, and the gradients \(\partial E/\partial A_j\) with respect to selected tensor operators \(A_j\). Gates that participate in gradient computation must be applied via cutensornetStateApplyTensorOperatorWithGradient(); then cutensornetExpectationComputeWithGradientsBackward() is used instead of cutensornetExpectationCompute() to obtain \(E\) and the gradients in one call.
The full code can be found in the NVIDIA/cuQuantum repository (here).
Headers and error handling#
20#include <cstdlib>
21#include <cstdio>
22#include <cassert>
23#include <complex>
24#include <vector>
25#include <iostream>
26#include <cmath>
27
28#include <cuda_runtime.h>
29#include <cutensornet.h>
30
31#define HANDLE_CUDA_ERROR(x) \
32{ const auto err = x; \
33 if( err != cudaSuccess ) \
34 { printf("CUDA error %s in line %d\n", cudaGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
35};
36
37#define HANDLE_CUTN_ERROR(x) \
38{ const auto err = x; \
39 if( err != CUTENSORNET_STATUS_SUCCESS ) \
40 { printf("cuTensorNet error %s in line %d\n", cutensornetGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
41};
42
43static constexpr std::size_t fp64size = sizeof(double);
44static constexpr std::size_t gate1Size = 4 * (2 * fp64size);
45static constexpr std::size_t gate2Size = 16 * (2 * fp64size);
46
47static void createHadamard(std::complex<double>* gate)
48{
49 const double inv_sqrt2 = 1.0 / std::sqrt(2.0);
50 gate[0] = std::complex<double>(inv_sqrt2, 0.0);
51 gate[1] = std::complex<double>(inv_sqrt2, 0.0);
52 gate[2] = std::complex<double>(inv_sqrt2, 0.0);
53 gate[3] = std::complex<double>(-inv_sqrt2, 0.0);
54}
55
56// CNOT: control q0, target q1. |00>->|00>, |01>->|01>, |10>->|11>, |11>->|10>.
57static void createCNOT(std::complex<double>* gate)
58{
59 for (int i = 0; i < 16; i++) gate[i] = std::complex<double>(0.0, 0.0);
60 gate[0] = std::complex<double>(1.0, 0.0);
61 gate[5] = std::complex<double>(1.0, 0.0);
62 gate[11] = std::complex<double>(1.0, 0.0);
63 gate[14] = std::complex<double>(1.0, 0.0);
64}
65
66static void createRXGate(double theta, std::complex<double>* gate)
67{
68 const double c = std::cos(theta / 2.0);
69 const double s = -std::sin(theta / 2.0);
70 gate[0] = std::complex<double>(c, 0.0);
71 gate[1] = std::complex<double>(0.0, s);
72 gate[2] = std::complex<double>(0.0, s);
73 gate[3] = std::complex<double>(c, 0.0);
74}
75
76static void createRYGate(double theta, std::complex<double>* gate)
77{
78 const double c = std::cos(theta / 2.0);
79 const double s = std::sin(theta / 2.0);
80 gate[0] = std::complex<double>(c, 0.0);
81 gate[1] = std::complex<double>(-s, 0.0);
82 gate[2] = std::complex<double>(s, 0.0);
83 gate[3] = std::complex<double>(c, 0.0);
84}
85
86static void createRZGate(double theta, std::complex<double>* gate)
87{
88 gate[0] = std::complex<double>(std::cos(theta/2), -std::sin(theta/2));
89 gate[1] = std::complex<double>(0.0, 0.0);
90 gate[2] = std::complex<double>(0.0, 0.0);
91 gate[3] = std::complex<double>(std::cos(theta/2), std::sin(theta/2));
92}
93
94static void createPauliX(std::complex<double>* op)
95{
96 op[0] = std::complex<double>(0.0, 0.0);
97 op[1] = std::complex<double>(1.0, 0.0);
98 op[2] = std::complex<double>(1.0, 0.0);
99 op[3] = std::complex<double>(0.0, 0.0);
100}
101
102static void createPauliY(std::complex<double>* op)
103{
104 op[0] = std::complex<double>(0.0, 0.0);
105 op[1] = std::complex<double>(0.0, -1.0);
106 op[2] = std::complex<double>(0.0, 1.0);
107 op[3] = std::complex<double>(0.0, 0.0);
108}
109
110static void createPauliZ(std::complex<double>* op)
111{
112 op[0] = std::complex<double>(1.0, 0.0);
113 op[1] = std::complex<double>(0.0, 0.0);
114 op[2] = std::complex<double>(0.0, 0.0);
115 op[3] = std::complex<double>(-1.0, 0.0);
116}
117
118static void createIdentity(std::complex<double>* op)
119{
120 op[0] = std::complex<double>(1.0, 0.0);
121 op[1] = std::complex<double>(0.0, 0.0);
122 op[2] = std::complex<double>(0.0, 0.0);
123 op[3] = std::complex<double>(1.0, 0.0);
124}
125
126int main()
127{
128 static_assert(sizeof(size_t) == sizeof(int64_t), "Please build this sample on a 64-bit architecture!");
Quantum state configuration#
132 // Quantum state configuration
133 constexpr int32_t numQubits = 4;
134 const std::vector<int64_t> qubitDims(numQubits, 2);
135 const double theta = M_PI / 4.0;
136 std::cout << "Quantum circuit: " << numQubits << " qubits (expectation gradient)\n";
Initialize the cuTensorNet library handle#
140 // Initialize the cuTensorNet library
141 HANDLE_CUDA_ERROR(cudaSetDevice(0));
142 cutensornetHandle_t cutnHandle;
143 HANDLE_CUTN_ERROR(cutensornetCreate(&cutnHandle));
144 cudaStream_t stream;
145 HANDLE_CUDA_ERROR(cudaStreamCreate(&stream));
146 std::cout << "Initialized cuTensorNet library on GPU 0\n";
Define quantum gates on GPU#
150 // Define necessary quantum gate tensors in Host memory
151 std::complex<double> h_gateH[4], h_gateCX[16], h_gateRx[4], h_gateRy[4], h_gateRz[4];
152 std::complex<double> h_gateX[4], h_gateY[4], h_gateZ[4], h_gateI[4];
153 createHadamard(h_gateH);
154 createCNOT(h_gateCX);
155 createRXGate(theta, h_gateRx);
156 createRYGate(theta, h_gateRy);
157 createRZGate(theta, h_gateRz);
158 createPauliX(h_gateX);
159 createPauliY(h_gateY);
160 createPauliZ(h_gateZ);
161 createIdentity(h_gateI);
162
163 // Copy quantum gates to Device memory
164 void *d_gateH{nullptr}, *d_gateCX{nullptr}, *d_gateRx{nullptr}, *d_gateRy{nullptr}, *d_gateRz{nullptr};
165 void *d_gateX{nullptr}, *d_gateY{nullptr}, *d_gateZ{nullptr}, *d_gateI{nullptr};
166 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateH, gate1Size));
167 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateCX, gate2Size));
168 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateRx, gate1Size));
169 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateRy, gate1Size));
170 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateRz, gate1Size));
171 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateX, gate1Size));
172 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateY, gate1Size));
173 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateZ, gate1Size));
174 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateI, gate1Size));
175 std::cout << "Allocated quantum gate memory on GPU\n";
176 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateH, h_gateH, gate1Size, cudaMemcpyHostToDevice));
177 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateCX, h_gateCX, gate2Size, cudaMemcpyHostToDevice));
178 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateRx, h_gateRx, gate1Size, cudaMemcpyHostToDevice));
179 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateRy, h_gateRy, gate1Size, cudaMemcpyHostToDevice));
180 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateRz, h_gateRz, gate1Size, cudaMemcpyHostToDevice));
181 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateX, h_gateX, gate1Size, cudaMemcpyHostToDevice));
182 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateY, h_gateY, gate1Size, cudaMemcpyHostToDevice));
183 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateZ, h_gateZ, gate1Size, cudaMemcpyHostToDevice));
184 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateI, h_gateI, gate1Size, cudaMemcpyHostToDevice));
185 std::cout << "Copied quantum gates to GPU memory\n";
186
187 // Allocate gradient output buffers (must be allocated before applying gates with gradient)
188 void *d_gradRy{nullptr}, *d_gradRz{nullptr};
189 HANDLE_CUDA_ERROR(cudaMalloc(&d_gradRy, gate1Size));
190 HANDLE_CUDA_ERROR(cudaMalloc(&d_gradRz, gate1Size));
191 HANDLE_CUDA_ERROR(cudaMemset(d_gradRy, 0, gate1Size));
192 HANDLE_CUDA_ERROR(cudaMemset(d_gradRz, 0, gate1Size));
193 std::cout << "Allocated gradient buffers on GPU\n";
Allocate the scratch buffer on GPU#
197 // Query the free memory on Device
198 std::size_t freeSize{0}, totalSize{0};
199 HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeSize, &totalSize));
200 const std::size_t scratchSize = (freeSize - (freeSize % 4096)) / 2;
201 void *d_scratch{nullptr};
202 HANDLE_CUDA_ERROR(cudaMalloc(&d_scratch, scratchSize));
203 std::cout << "Allocated " << scratchSize << " bytes of scratch memory on GPU\n";
Create the initial quantum state#
207 // Create the initial quantum state
208 cutensornetState_t quantumState;
209 HANDLE_CUTN_ERROR(cutensornetCreateState(cutnHandle, CUTENSORNET_STATE_PURITY_PURE, numQubits, qubitDims.data(),
210 CUDA_C_64F, &quantumState));
211 std::cout << "Created the initial quantum state\n";
Apply quantum gates (with gradient registration)#
Apply gates to build the circuit. Use cutensornetStateApplyTensorOperatorWithGradient() for gates that need gradients; provide a device buffer where the gradient will be written. Gradient buffers must be allocated before applying those gates.
215 // Construct the final quantum circuit state (apply quantum gates); register Ry on q0 and Rz on q2 for gradient
216
217 int64_t id;
218 HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 1, std::vector<int32_t>{{1}}.data(),
219 d_gateH, nullptr, 1, 0, 1, &id));
220 HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 1, std::vector<int32_t>{{3}}.data(),
221 d_gateH, nullptr, 1, 0, 1, &id));
222 HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 2, std::vector<int32_t>{{1, 2}}.data(),
223 d_gateCX, nullptr, 1, 0, 1, &id));
224 HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 2, std::vector<int32_t>{{2, 3}}.data(),
225 d_gateCX, nullptr, 1, 0, 1, &id));
226 HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperatorWithGradient(cutnHandle, quantumState, 1, std::vector<int32_t>{{0}}.data(),
227 d_gateRy, nullptr, 0, 0, 1, d_gradRy, nullptr, &id));
228 HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 1, std::vector<int32_t>{{1}}.data(),
229 d_gateRx, nullptr, 0, 0, 1, &id));
230 HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperatorWithGradient(cutnHandle, quantumState, 1, std::vector<int32_t>{{2}}.data(),
231 d_gateRz, nullptr, 0, 0, 1, d_gradRz, nullptr, &id));
232 HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 2, std::vector<int32_t>{{0, 1}}.data(),
233 d_gateCX, nullptr, 1, 0, 1, &id));
234 std::cout << "Applied quantum gates (Ry on q0 and Rz on q2 registered for gradient)\n";
Construct the tensor network operator (Hamiltonian)#
Now let’s create an empty tensor network operator and then append three components to it, where each component is a direct product of Pauli matrices scaled by some complex coefficient.
238 // Create an empty tensor network operator and append Hamiltonian terms: 2.0*XYZZ, 3.0*IZZI, 5.0*ZIYY,
239 cutensornetNetworkOperator_t hamiltonian;
240 HANDLE_CUTN_ERROR(cutensornetCreateNetworkOperator(cutnHandle, numQubits, qubitDims.data(), CUDA_C_64F, &hamiltonian));
241 {
242 const int32_t numModes[] = {1, 1, 1, 1};
243 const int32_t modes0[] = {0}, modes1[] = {1}, modes2[] = {2}, modes3[] = {3};
244 const int32_t * stateModes[] = {modes0, modes1, modes2, modes3};
245 const void * gateData[] = {d_gateX, d_gateY, d_gateZ, d_gateZ};
246 HANDLE_CUTN_ERROR(cutensornetNetworkOperatorAppendProduct(cutnHandle, hamiltonian, cuDoubleComplex{2.0, 0.0},
247 4, numModes, stateModes, NULL, gateData, &id));
248 }
249 {
250 const int32_t numModes[] = {1, 1, 1, 1};
251 const int32_t modes0[] = {0}, modes1[] = {1}, modes2[] = {2}, modes3[] = {3};
252 const int32_t * stateModes[] = {modes0, modes1, modes2, modes3};
253 const void * gateData[] = {d_gateI, d_gateZ, d_gateZ, d_gateI};
254 HANDLE_CUTN_ERROR(cutensornetNetworkOperatorAppendProduct(cutnHandle, hamiltonian, cuDoubleComplex{3.0, 0.0},
255 4, numModes, stateModes, NULL, gateData, &id));
256 }
257 {
258 const int32_t numModes[] = {1, 1, 1, 1};
259 const int32_t modes0[] = {0}, modes1[] = {1}, modes2[] = {2}, modes3[] = {3};
260 const int32_t * stateModes[] = {modes0, modes1, modes2, modes3};
261 const void * gateData[] = {d_gateZ, d_gateI, d_gateY, d_gateY};
262 HANDLE_CUTN_ERROR(cutensornetNetworkOperatorAppendProduct(cutnHandle, hamiltonian, cuDoubleComplex{5.0, 0.0},
263 4, numModes, stateModes, NULL, gateData, &id));
264 }
265
266 std::cout << "Constructed a tensor network operator: 2.0*XYZZ + 3.0*IZZI + 5.0*ZIYY \n";
Create the expectation value object#
270 // Specify the quantum circuit expectation value
271 cutensornetStateExpectation_t expectation;
272 HANDLE_CUTN_ERROR(cutensornetCreateExpectation(cutnHandle, quantumState, hamiltonian, &expectation));
273 std::cout << "Created the specified quantum circuit expectation value\n";
Configure the expectation value calculation#
277 // Configure the computation of the specified quantum circuit expectation value
278 const int32_t numHyperSamples = 8; // desired number of hyper samples used in the tensor network contraction path finder
279 HANDLE_CUTN_ERROR(cutensornetExpectationConfigure(cutnHandle, expectation,
280 CUTENSORNET_EXPECTATION_CONFIG_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples)));
Prepare the expectation value (with gradient support)#
Let’s create a workspace descriptor and prepare the computation of the desired expectation value and gradients.
284 // Prepare the specified quantum circuit expectation value for computation (with gradient support)
285 cutensornetWorkspaceDescriptor_t workDesc;
286 HANDLE_CUTN_ERROR(cutensornetCreateWorkspaceDescriptor(cutnHandle, &workDesc));
287 std::cout << "Created the workspace descriptor\n";
288 HANDLE_CUTN_ERROR(cutensornetExpectationPrepare(cutnHandle, expectation, scratchSize, workDesc, stream));
289 std::cout << "Prepared the specified quantum circuit expectation value (gradient backward)\n";
Set up the workspace#
293 // Attach the workspace buffer
294 int64_t worksize{0};
295 HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle, workDesc,
296 CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
297 CUTENSORNET_MEMSPACE_DEVICE,
298 CUTENSORNET_WORKSPACE_SCRATCH,
299 &worksize));
300 std::cout << "Required scratch GPU workspace size (bytes) = " << worksize << std::endl;
301 if (worksize <= scratchSize) {
302 HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
303 CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
304 } else {
305 std::cout << "ERROR: Insufficient workspace size on Device!\n";
306 std::abort();
307 }
308 std::cout << "Set the workspace buffer\n";
Compute expectation value and gradients (backward)#
Call cutensornetExpectationComputeWithGradientsBackward() to obtain \(E\) and set expectationValueAdjoint to 1.0 for direct gradient \(\partial E/\partial A_j\).
312 // Compute the specified quantum circuit expectation value and gradients (backward pass)
313 std::complex<double> expectVal{0.0, 0.0};
314 cuDoubleComplex expectationAdjoint = {1.0, 0.0};
315 HANDLE_CUTN_ERROR(cutensornetExpectationComputeWithGradientsBackward(cutnHandle, expectation,
316 0, &expectationAdjoint, nullptr, workDesc,
317 static_cast<void*>(&expectVal), nullptr, stream));
318 HANDLE_CUDA_ERROR(cudaStreamSynchronize(stream));
319 std::cout << "Computed the specified quantum circuit expectation value and gradients\n";
320 std::cout << "Expectation value = (" << expectVal.real() << ", " << expectVal.imag() << ")\n";
321
322
323 // Copy gradients back to host and print
324 std::complex<double> h_gradRy[4], h_gradRz[4];
325 HANDLE_CUDA_ERROR(cudaMemcpy(h_gradRy, d_gradRy, gate1Size, cudaMemcpyDeviceToHost));
326 HANDLE_CUDA_ERROR(cudaMemcpy(h_gradRz, d_gradRz, gate1Size, cudaMemcpyDeviceToHost));
327 std::cout << "Gradient d<H>/d(Ry on q0):\n";
328 std::cout << " [0,0]: (" << h_gradRy[0].real() << ", " << h_gradRy[0].imag() << ")\n";
329 std::cout << " [0,1]: (" << h_gradRy[1].real() << ", " << h_gradRy[1].imag() << ")\n";
330 std::cout << " [1,0]: (" << h_gradRy[2].real() << ", " << h_gradRy[2].imag() << ")\n";
331 std::cout << " [1,1]: (" << h_gradRy[3].real() << ", " << h_gradRy[3].imag() << ")\n";
332 std::cout << "Gradient d<H>/d(Rz on q2):\n";
333 std::cout << " [0,0]: (" << h_gradRz[0].real() << ", " << h_gradRz[0].imag() << ")\n";
334 std::cout << " [0,1]: (" << h_gradRz[1].real() << ", " << h_gradRz[1].imag() << ")\n";
335 std::cout << " [1,0]: (" << h_gradRz[2].real() << ", " << h_gradRz[2].imag() << ")\n";
336 std::cout << " [1,1]: (" << h_gradRz[3].real() << ", " << h_gradRz[3].imag() << ")\n";
Free resources#
340 // Destroy the workspace descriptor
341 HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc));
342 std::cout << "Destroyed the workspace descriptor\n";
343
344 // Destroy the quantum circuit expectation value
345 HANDLE_CUTN_ERROR(cutensornetDestroyExpectation(expectation));
346 std::cout << "Destroyed the quantum circuit state expectation value\n";
347
348 // Destroy the tensor network operator
349 HANDLE_CUTN_ERROR(cutensornetDestroyNetworkOperator(hamiltonian));
350 std::cout << "Destroyed the tensor network operator\n";
351
352 // Destroy the quantum circuit state
353 HANDLE_CUTN_ERROR(cutensornetDestroyState(quantumState));
354 std::cout << "Destroyed the quantum circuit state\n";
355
356 HANDLE_CUDA_ERROR(cudaFree(d_scratch));
357 HANDLE_CUDA_ERROR(cudaFree(d_gradRy));
358 HANDLE_CUDA_ERROR(cudaFree(d_gradRz));
359 HANDLE_CUDA_ERROR(cudaFree(d_gateRz));
360 HANDLE_CUDA_ERROR(cudaFree(d_gateRy));
361 HANDLE_CUDA_ERROR(cudaFree(d_gateRx));
362 HANDLE_CUDA_ERROR(cudaFree(d_gateCX));
363 HANDLE_CUDA_ERROR(cudaFree(d_gateI));
364 HANDLE_CUDA_ERROR(cudaFree(d_gateZ));
365 HANDLE_CUDA_ERROR(cudaFree(d_gateY));
366 HANDLE_CUDA_ERROR(cudaFree(d_gateX));
367 HANDLE_CUDA_ERROR(cudaFree(d_gateH));
368 std::cout << "Freed memory on GPU\n";
369
370 HANDLE_CUDA_ERROR(cudaStreamDestroy(stream));
371 HANDLE_CUTN_ERROR(cutensornetDestroy(cutnHandle));
372 std::cout << "Finalized the cuTensorNet library\n";
373
374 return 0;
375}