Code example (expectation value with gradients)#

The following code example illustrates how to compute the expectation value \(E = \langle\psi|H|\psi\rangle\) of a tensor network operator (Hamiltonian) over a tensor network state, and the gradients \(\partial E/\partial A_j\) with respect to selected tensor operators \(A_j\). Gates that participate in gradient computation must be applied via cutensornetStateApplyTensorOperatorWithGradient(); then cutensornetExpectationComputeWithGradientsBackward() is used instead of cutensornetExpectationCompute() to obtain \(E\) and the gradients in one call.

The full code can be found in the NVIDIA/cuQuantum repository (here).

Headers and error handling#

 20#include <cstdlib>
 21#include <cstdio>
 22#include <cassert>
 23#include <complex>
 24#include <vector>
 25#include <iostream>
 26#include <cmath>
 27
 28#include <cuda_runtime.h>
 29#include <cutensornet.h>
 30
 31#define HANDLE_CUDA_ERROR(x) \
 32{ const auto err = x; \
 33  if( err != cudaSuccess ) \
 34  { printf("CUDA error %s in line %d\n", cudaGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
 35};
 36
 37#define HANDLE_CUTN_ERROR(x) \
 38{ const auto err = x; \
 39  if( err != CUTENSORNET_STATUS_SUCCESS ) \
 40  { printf("cuTensorNet error %s in line %d\n", cutensornetGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
 41};
 42
 43static constexpr std::size_t fp64size = sizeof(double);
 44static constexpr std::size_t gate1Size = 4 * (2 * fp64size);
 45static constexpr std::size_t gate2Size = 16 * (2 * fp64size);
 46
 47static void createHadamard(std::complex<double>* gate)
 48{
 49  const double inv_sqrt2 = 1.0 / std::sqrt(2.0);
 50  gate[0] = std::complex<double>(inv_sqrt2, 0.0);
 51  gate[1] = std::complex<double>(inv_sqrt2, 0.0);
 52  gate[2] = std::complex<double>(inv_sqrt2, 0.0);
 53  gate[3] = std::complex<double>(-inv_sqrt2, 0.0);
 54}
 55
 56// CNOT: control q0, target q1. |00>->|00>, |01>->|01>, |10>->|11>, |11>->|10>.
 57static void createCNOT(std::complex<double>* gate)
 58{
 59  for (int i = 0; i < 16; i++) gate[i] = std::complex<double>(0.0, 0.0);
 60  gate[0]  = std::complex<double>(1.0, 0.0);
 61  gate[5]  = std::complex<double>(1.0, 0.0);
 62  gate[11] = std::complex<double>(1.0, 0.0);
 63  gate[14] = std::complex<double>(1.0, 0.0);
 64}
 65
 66static void createRXGate(double theta, std::complex<double>* gate)
 67{
 68  const double c = std::cos(theta / 2.0);
 69  const double s = -std::sin(theta / 2.0);
 70  gate[0] = std::complex<double>(c, 0.0);
 71  gate[1] = std::complex<double>(0.0, s);
 72  gate[2] = std::complex<double>(0.0, s);
 73  gate[3] = std::complex<double>(c, 0.0);
 74}
 75
 76static void createRYGate(double theta, std::complex<double>* gate)
 77{
 78  const double c = std::cos(theta / 2.0);
 79  const double s = std::sin(theta / 2.0);
 80  gate[0] = std::complex<double>(c, 0.0);
 81  gate[1] = std::complex<double>(-s, 0.0);
 82  gate[2] = std::complex<double>(s, 0.0);
 83  gate[3] = std::complex<double>(c, 0.0);
 84}
 85
 86static void createRZGate(double theta, std::complex<double>* gate)
 87{
 88  gate[0] = std::complex<double>(std::cos(theta/2), -std::sin(theta/2));
 89  gate[1] = std::complex<double>(0.0, 0.0);
 90  gate[2] = std::complex<double>(0.0, 0.0);
 91  gate[3] = std::complex<double>(std::cos(theta/2), std::sin(theta/2));
 92}
 93
 94static void createPauliX(std::complex<double>* op)
 95{
 96  op[0] = std::complex<double>(0.0, 0.0);
 97  op[1] = std::complex<double>(1.0, 0.0);
 98  op[2] = std::complex<double>(1.0, 0.0);
 99  op[3] = std::complex<double>(0.0, 0.0);
100}
101
102static void createPauliY(std::complex<double>* op)
103{
104  op[0] = std::complex<double>(0.0, 0.0);
105  op[1] = std::complex<double>(0.0, -1.0);
106  op[2] = std::complex<double>(0.0, 1.0);
107  op[3] = std::complex<double>(0.0, 0.0);
108}
109
110static void createPauliZ(std::complex<double>* op)
111{
112  op[0] = std::complex<double>(1.0, 0.0);
113  op[1] = std::complex<double>(0.0, 0.0);
114  op[2] = std::complex<double>(0.0, 0.0);
115  op[3] = std::complex<double>(-1.0, 0.0);
116}
117
118static void createIdentity(std::complex<double>* op)
119{
120  op[0] = std::complex<double>(1.0, 0.0);
121  op[1] = std::complex<double>(0.0, 0.0);
122  op[2] = std::complex<double>(0.0, 0.0);
123  op[3] = std::complex<double>(1.0, 0.0);
124}
125
126int main()
127{
128  static_assert(sizeof(size_t) == sizeof(int64_t), "Please build this sample on a 64-bit architecture!");

Quantum state configuration#

132  // Quantum state configuration
133  constexpr int32_t numQubits = 4;
134  const std::vector<int64_t> qubitDims(numQubits, 2);
135  const double theta = M_PI / 4.0;
136  std::cout << "Quantum circuit: " << numQubits << " qubits (expectation gradient)\n";

Initialize the cuTensorNet library handle#

140  // Initialize the cuTensorNet library
141  HANDLE_CUDA_ERROR(cudaSetDevice(0));
142  cutensornetHandle_t cutnHandle;
143  HANDLE_CUTN_ERROR(cutensornetCreate(&cutnHandle));
144  cudaStream_t stream;
145  HANDLE_CUDA_ERROR(cudaStreamCreate(&stream));
146  std::cout << "Initialized cuTensorNet library on GPU 0\n";

Define quantum gates on GPU#

150  // Define necessary quantum gate tensors in Host memory
151  std::complex<double> h_gateH[4], h_gateCX[16], h_gateRx[4], h_gateRy[4], h_gateRz[4];
152  std::complex<double> h_gateX[4], h_gateY[4], h_gateZ[4], h_gateI[4];
153  createHadamard(h_gateH);
154  createCNOT(h_gateCX);
155  createRXGate(theta, h_gateRx);
156  createRYGate(theta, h_gateRy);
157  createRZGate(theta, h_gateRz);
158  createPauliX(h_gateX);
159  createPauliY(h_gateY);
160  createPauliZ(h_gateZ);
161  createIdentity(h_gateI);
162
163  // Copy quantum gates to Device memory
164  void *d_gateH{nullptr}, *d_gateCX{nullptr}, *d_gateRx{nullptr}, *d_gateRy{nullptr}, *d_gateRz{nullptr};
165  void *d_gateX{nullptr}, *d_gateY{nullptr}, *d_gateZ{nullptr}, *d_gateI{nullptr};
166  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateH, gate1Size));
167  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateCX, gate2Size));
168  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateRx, gate1Size));
169  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateRy, gate1Size));
170  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateRz, gate1Size));
171  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateX, gate1Size));
172  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateY, gate1Size));
173  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateZ, gate1Size));
174  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateI, gate1Size));
175  std::cout << "Allocated quantum gate memory on GPU\n";
176  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateH, h_gateH, gate1Size, cudaMemcpyHostToDevice));
177  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateCX, h_gateCX, gate2Size, cudaMemcpyHostToDevice));
178  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateRx, h_gateRx, gate1Size, cudaMemcpyHostToDevice));
179  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateRy, h_gateRy, gate1Size, cudaMemcpyHostToDevice));
180  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateRz, h_gateRz, gate1Size, cudaMemcpyHostToDevice));
181  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateX, h_gateX, gate1Size, cudaMemcpyHostToDevice));
182  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateY, h_gateY, gate1Size, cudaMemcpyHostToDevice));
183  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateZ, h_gateZ, gate1Size, cudaMemcpyHostToDevice));
184  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateI, h_gateI, gate1Size, cudaMemcpyHostToDevice));
185  std::cout << "Copied quantum gates to GPU memory\n";
186
187  // Allocate gradient output buffers (must be allocated before applying gates with gradient)
188  void *d_gradRy{nullptr}, *d_gradRz{nullptr};
189  HANDLE_CUDA_ERROR(cudaMalloc(&d_gradRy, gate1Size));
190  HANDLE_CUDA_ERROR(cudaMalloc(&d_gradRz, gate1Size));
191  HANDLE_CUDA_ERROR(cudaMemset(d_gradRy, 0, gate1Size));
192  HANDLE_CUDA_ERROR(cudaMemset(d_gradRz, 0, gate1Size));
193  std::cout << "Allocated gradient buffers on GPU\n";

Allocate the scratch buffer on GPU#

197  // Query the free memory on Device
198  std::size_t freeSize{0}, totalSize{0};
199  HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeSize, &totalSize));
200  const std::size_t scratchSize = (freeSize - (freeSize % 4096)) / 2;
201  void *d_scratch{nullptr};
202  HANDLE_CUDA_ERROR(cudaMalloc(&d_scratch, scratchSize));
203  std::cout << "Allocated " << scratchSize << " bytes of scratch memory on GPU\n";

Create the initial quantum state#

207  // Create the initial quantum state
208  cutensornetState_t quantumState;
209  HANDLE_CUTN_ERROR(cutensornetCreateState(cutnHandle, CUTENSORNET_STATE_PURITY_PURE, numQubits, qubitDims.data(),
210                    CUDA_C_64F, &quantumState));
211  std::cout << "Created the initial quantum state\n";

Apply quantum gates (with gradient registration)#

Apply gates to build the circuit. Use cutensornetStateApplyTensorOperatorWithGradient() for gates that need gradients; provide a device buffer where the gradient will be written. Gradient buffers must be allocated before applying those gates.

215  // Construct the final quantum circuit state (apply quantum gates); register Ry on q0 and Rz on q2 for gradient
216  
217  int64_t id;
218  HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 1, std::vector<int32_t>{{1}}.data(),
219                    d_gateH, nullptr, 1, 0, 1, &id));
220  HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 1, std::vector<int32_t>{{3}}.data(),
221                    d_gateH, nullptr, 1, 0, 1, &id));
222  HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 2, std::vector<int32_t>{{1, 2}}.data(),
223                    d_gateCX, nullptr, 1, 0, 1, &id));
224  HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 2, std::vector<int32_t>{{2, 3}}.data(),
225                    d_gateCX, nullptr, 1, 0, 1, &id));
226  HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperatorWithGradient(cutnHandle, quantumState, 1, std::vector<int32_t>{{0}}.data(),
227                    d_gateRy, nullptr, 0, 0, 1, d_gradRy, nullptr, &id));
228  HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 1, std::vector<int32_t>{{1}}.data(),
229                    d_gateRx, nullptr, 0, 0, 1, &id));
230  HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperatorWithGradient(cutnHandle, quantumState, 1, std::vector<int32_t>{{2}}.data(),
231                    d_gateRz, nullptr, 0, 0, 1, d_gradRz, nullptr, &id));
232  HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 2, std::vector<int32_t>{{0, 1}}.data(),
233                    d_gateCX, nullptr, 1, 0, 1, &id));
234  std::cout << "Applied quantum gates (Ry on q0 and Rz on q2 registered for gradient)\n";

Construct the tensor network operator (Hamiltonian)#

Now let’s create an empty tensor network operator and then append three components to it, where each component is a direct product of Pauli matrices scaled by some complex coefficient.

238  // Create an empty tensor network operator and append Hamiltonian terms: 2.0*XYZZ, 3.0*IZZI, 5.0*ZIYY, 
239  cutensornetNetworkOperator_t hamiltonian;
240  HANDLE_CUTN_ERROR(cutensornetCreateNetworkOperator(cutnHandle, numQubits, qubitDims.data(), CUDA_C_64F, &hamiltonian));
241  {
242    const int32_t numModes[] = {1, 1, 1, 1};
243    const int32_t modes0[] = {0}, modes1[] = {1}, modes2[] = {2}, modes3[] = {3};
244    const int32_t * stateModes[] = {modes0, modes1, modes2, modes3};
245    const void * gateData[] = {d_gateX, d_gateY, d_gateZ, d_gateZ};
246    HANDLE_CUTN_ERROR(cutensornetNetworkOperatorAppendProduct(cutnHandle, hamiltonian, cuDoubleComplex{2.0, 0.0},
247                      4, numModes, stateModes, NULL, gateData, &id));
248  }
249  {
250    const int32_t numModes[] = {1, 1, 1, 1};
251    const int32_t modes0[] = {0}, modes1[] = {1}, modes2[] = {2}, modes3[] = {3};
252    const int32_t * stateModes[] = {modes0, modes1, modes2, modes3};
253    const void * gateData[] = {d_gateI, d_gateZ, d_gateZ, d_gateI};
254    HANDLE_CUTN_ERROR(cutensornetNetworkOperatorAppendProduct(cutnHandle, hamiltonian, cuDoubleComplex{3.0, 0.0},
255                      4, numModes, stateModes, NULL, gateData, &id));
256  }
257  {
258    const int32_t numModes[] = {1, 1, 1, 1};
259    const int32_t modes0[] = {0}, modes1[] = {1}, modes2[] = {2}, modes3[] = {3};
260    const int32_t * stateModes[] = {modes0, modes1, modes2, modes3};
261    const void * gateData[] = {d_gateZ, d_gateI, d_gateY, d_gateY};
262    HANDLE_CUTN_ERROR(cutensornetNetworkOperatorAppendProduct(cutnHandle, hamiltonian, cuDoubleComplex{5.0, 0.0},
263                      4, numModes, stateModes, NULL, gateData, &id));
264  }
265  
266  std::cout << "Constructed a tensor network operator: 2.0*XYZZ + 3.0*IZZI + 5.0*ZIYY \n";

Create the expectation value object#

270  // Specify the quantum circuit expectation value
271  cutensornetStateExpectation_t expectation;
272  HANDLE_CUTN_ERROR(cutensornetCreateExpectation(cutnHandle, quantumState, hamiltonian, &expectation));
273  std::cout << "Created the specified quantum circuit expectation value\n";

Configure the expectation value calculation#

277  // Configure the computation of the specified quantum circuit expectation value
278  const int32_t numHyperSamples = 8; // desired number of hyper samples used in the tensor network contraction path finder
279  HANDLE_CUTN_ERROR(cutensornetExpectationConfigure(cutnHandle, expectation,
280                    CUTENSORNET_EXPECTATION_CONFIG_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples)));

Prepare the expectation value (with gradient support)#

Let’s create a workspace descriptor and prepare the computation of the desired expectation value and gradients.

284  // Prepare the specified quantum circuit expectation value for computation (with gradient support)
285  cutensornetWorkspaceDescriptor_t workDesc;
286  HANDLE_CUTN_ERROR(cutensornetCreateWorkspaceDescriptor(cutnHandle, &workDesc));
287  std::cout << "Created the workspace descriptor\n";
288  HANDLE_CUTN_ERROR(cutensornetExpectationPrepare(cutnHandle, expectation, scratchSize, workDesc, stream));
289  std::cout << "Prepared the specified quantum circuit expectation value (gradient backward)\n";

Set up the workspace#

293  // Attach the workspace buffer
294  int64_t worksize{0};
295  HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle, workDesc,
296                      CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
297                      CUTENSORNET_MEMSPACE_DEVICE,
298                      CUTENSORNET_WORKSPACE_SCRATCH,
299                      &worksize));
300  std::cout << "Required scratch GPU workspace size (bytes) = " << worksize << std::endl;
301  if (worksize <= scratchSize) {
302    HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
303                      CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
304  } else {
305    std::cout << "ERROR: Insufficient workspace size on Device!\n";
306    std::abort();
307  }
308  std::cout << "Set the workspace buffer\n";

Compute expectation value and gradients (backward)#

Call cutensornetExpectationComputeWithGradientsBackward() to obtain \(E\) and set expectationValueAdjoint to 1.0 for direct gradient \(\partial E/\partial A_j\).

312  // Compute the specified quantum circuit expectation value and gradients (backward pass)
313  std::complex<double> expectVal{0.0, 0.0};
314  cuDoubleComplex expectationAdjoint = {1.0, 0.0};
315  HANDLE_CUTN_ERROR(cutensornetExpectationComputeWithGradientsBackward(cutnHandle, expectation,
316                    0, &expectationAdjoint, nullptr, workDesc,
317                    static_cast<void*>(&expectVal), nullptr, stream));
318  HANDLE_CUDA_ERROR(cudaStreamSynchronize(stream));
319  std::cout << "Computed the specified quantum circuit expectation value and gradients\n";
320  std::cout << "Expectation value = (" << expectVal.real() << ", " << expectVal.imag() << ")\n";
321  
322
323  // Copy gradients back to host and print
324  std::complex<double> h_gradRy[4], h_gradRz[4];
325  HANDLE_CUDA_ERROR(cudaMemcpy(h_gradRy, d_gradRy, gate1Size, cudaMemcpyDeviceToHost));
326  HANDLE_CUDA_ERROR(cudaMemcpy(h_gradRz, d_gradRz, gate1Size, cudaMemcpyDeviceToHost));
327  std::cout << "Gradient d<H>/d(Ry on q0):\n";
328  std::cout << "  [0,0]: (" << h_gradRy[0].real() << ", " << h_gradRy[0].imag() << ")\n";
329  std::cout << "  [0,1]: (" << h_gradRy[1].real() << ", " << h_gradRy[1].imag() << ")\n";
330  std::cout << "  [1,0]: (" << h_gradRy[2].real() << ", " << h_gradRy[2].imag() << ")\n";
331  std::cout << "  [1,1]: (" << h_gradRy[3].real() << ", " << h_gradRy[3].imag() << ")\n";
332  std::cout << "Gradient d<H>/d(Rz on q2):\n";
333  std::cout << "  [0,0]: (" << h_gradRz[0].real() << ", " << h_gradRz[0].imag() << ")\n";
334  std::cout << "  [0,1]: (" << h_gradRz[1].real() << ", " << h_gradRz[1].imag() << ")\n";
335  std::cout << "  [1,0]: (" << h_gradRz[2].real() << ", " << h_gradRz[2].imag() << ")\n";
336  std::cout << "  [1,1]: (" << h_gradRz[3].real() << ", " << h_gradRz[3].imag() << ")\n";

Free resources#

340  // Destroy the workspace descriptor
341  HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc));
342  std::cout << "Destroyed the workspace descriptor\n";
343
344  // Destroy the quantum circuit expectation value
345  HANDLE_CUTN_ERROR(cutensornetDestroyExpectation(expectation));
346  std::cout << "Destroyed the quantum circuit state expectation value\n";
347
348  // Destroy the tensor network operator
349  HANDLE_CUTN_ERROR(cutensornetDestroyNetworkOperator(hamiltonian));
350  std::cout << "Destroyed the tensor network operator\n";
351
352  // Destroy the quantum circuit state
353  HANDLE_CUTN_ERROR(cutensornetDestroyState(quantumState));
354  std::cout << "Destroyed the quantum circuit state\n";
355
356  HANDLE_CUDA_ERROR(cudaFree(d_scratch));
357  HANDLE_CUDA_ERROR(cudaFree(d_gradRy));
358  HANDLE_CUDA_ERROR(cudaFree(d_gradRz));
359  HANDLE_CUDA_ERROR(cudaFree(d_gateRz));
360  HANDLE_CUDA_ERROR(cudaFree(d_gateRy));
361  HANDLE_CUDA_ERROR(cudaFree(d_gateRx));
362  HANDLE_CUDA_ERROR(cudaFree(d_gateCX));
363  HANDLE_CUDA_ERROR(cudaFree(d_gateI));
364  HANDLE_CUDA_ERROR(cudaFree(d_gateZ));
365  HANDLE_CUDA_ERROR(cudaFree(d_gateY));
366  HANDLE_CUDA_ERROR(cudaFree(d_gateX));
367  HANDLE_CUDA_ERROR(cudaFree(d_gateH));
368  std::cout << "Freed memory on GPU\n";
369
370  HANDLE_CUDA_ERROR(cudaStreamDestroy(stream));
371  HANDLE_CUTN_ERROR(cutensornetDestroy(cutnHandle));
372  std::cout << "Finalized the cuTensorNet library\n";
373
374  return 0;
375}