Sampling the Matrix Product State (circuit with Matrix Product Operators)

The following code example illustrates how to define a tensor network state by a circuit with Matrix Product Operators (MPO), then compute its Matrix Product State (MPS) factorization, and, finally, sample the MPS-factorized state. The full code can be found in the NVIDIA/cuQuantum repository (here).

Headers and error handling

 7#include <cstdlib>
 8#include <cstdio>
 9#include <cmath>
10#include <cassert>
11#include <complex>
12#include <random>
13#include <functional>
14#include <vector>
15#include <iostream>
16
17#include <cuda_runtime.h>
18#include <cutensornet.h>
19
20#define HANDLE_CUDA_ERROR(x) \
21{ const auto err = x; \
22  if( err != cudaSuccess ) \
23  { printf("CUDA error %s in line %d\n", cudaGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
24};
25
26#define HANDLE_CUTN_ERROR(x) \
27{ const auto err = x; \
28  if( err != CUTENSORNET_STATUS_SUCCESS ) \
29  { printf("cuTensorNet error %s in line %d\n", cutensornetGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
30};
31
32
33int main()
34{
35  static_assert(sizeof(size_t) == sizeof(int64_t), "Please build this sample on a 64-bit architecture!");
36
37  constexpr std::size_t fp64size = sizeof(double);
38  constexpr cudaDataType_t dataType = CUDA_C_64F;

Define the tensor network state and the desired number of output samples to generate

Let’s define all parameters of the tensor network state for a quantum circuit with the given number of qubits and request to produce a given number of output samples for the full qubit register. We also configure the maximal bond dimension for the MPS and MPO tensor network representations used in this example. For the MPO, we also define the number of sites (qubits) it acts on as well as the number of layers of the MPO.

42  // Quantum state configuration
43  const int64_t numSamples = 128; // number of samples to generate
44  const int32_t numQudits = 6;    // number of qudits
45  const int64_t quditDim = 2;     // dimension of all qudits
46  const std::vector<int64_t> quditDims(numQudits, quditDim); // qudit dimensions
47  const int64_t mpsBondDim = 8;   // maximum MPS bond dimension
48  const int64_t mpoBondDim = 2;   // maximum MPO bond dimension
49  const int32_t mpoNumLayers = 5; // number of MPO layers
50  constexpr int32_t mpoNumSites = 4;  // number of MPO sites
51  std::cout << "Quantum circuit: " << numQudits << " qudits; " << numSamples << " samples\n";
52
53  /* Action of five alternating four-site MPO gates (operators)
54     on the 6-quqit quantum register (illustration):
55
56  Q----X---------X---------X----
57       |         |         |
58  Q----X---------X---------X----
59       |         |         |
60  Q----X----X----X----X----X----
61       |    |    |    |    |
62  Q----X----X----X----X----X----
63            |         |
64  Q---------X---------X---------
65            |         |
66  Q---------X---------X---------
67
68    |layer|
69  */
70  static_assert(mpoNumSites > 1, "Number of MPO sites must be larger than one!");
71
72  // Random number generator
73  std::default_random_engine generator;
74  std::uniform_real_distribution<double> distribution(-1.0, 1.0);
75  auto rnd = std::bind(distribution, generator);

Initialize the cuTensorNet library handle

79  // Initialize the cuTensorNet library
80  HANDLE_CUDA_ERROR(cudaSetDevice(0));
81  cutensornetHandle_t cutnHandle;
82  HANDLE_CUTN_ERROR(cutensornetCreate(&cutnHandle));
83  std::cout << "Initialized cuTensorNet library on GPU 0\n";

Define and allocate MPO tensors

Here we define the shapes of the MPO tensors, fill them in with random data on Host, and, finally, transfer them into Device memory.

 87  /* MPO tensor mode numeration (open boundary condition):
 88
 89     2            3                   2
 90     |            |                   |
 91     X--1------0--X--2---- ... ----0--X
 92     |            |                   |
 93     0            1                   1
 94
 95  */
 96  // Define shapes of the MPO tensors
 97  using GateData = std::vector<std::complex<double>>;
 98  using ModeExtents = std::vector<int64_t>;
 99  std::vector<GateData> mpoTensors(mpoNumSites);        // MPO tensors
100  std::vector<ModeExtents> mpoModeExtents(mpoNumSites); // mode extents for MPO tensors
101  int64_t upperBondDim = 1;
102  for(int tensId = 0; tensId < (mpoNumSites / 2); ++tensId) {
103    const auto leftBondDim = std::min(mpoBondDim, upperBondDim);
104    const auto rightBondDim = std::min(mpoBondDim, leftBondDim * (quditDim * quditDim));
105    if(tensId == 0) {
106      mpoModeExtents[tensId] = {quditDim, rightBondDim, quditDim};
107    }else{
108      mpoModeExtents[tensId] = {leftBondDim, quditDim, rightBondDim, quditDim};
109    }
110    upperBondDim = rightBondDim;
111  }
112  auto centralBondDim = upperBondDim;
113  if(mpoNumSites % 2 != 0) {
114    const int tensId = mpoNumSites / 2;
115    mpoModeExtents[tensId] = {centralBondDim, quditDim, centralBondDim, quditDim};
116  }
117  upperBondDim = 1;
118  for(int tensId = (mpoNumSites-1); tensId >= (mpoNumSites / 2) + (mpoNumSites % 2); --tensId) {
119    const auto rightBondDim = std::min(mpoBondDim, upperBondDim);
120    const auto leftBondDim = std::min(mpoBondDim, rightBondDim * (quditDim * quditDim));
121    if(tensId == (mpoNumSites-1)) {
122      mpoModeExtents[tensId] = {leftBondDim, quditDim, quditDim};
123    }else{
124      mpoModeExtents[tensId] = {leftBondDim, quditDim, rightBondDim, quditDim};
125    }
126    upperBondDim = leftBondDim;
127  }
128  // Fill in the MPO tensors with random data on Host
129  std::vector<const int64_t*> mpoModeExtentsPtr(mpoNumSites, nullptr);
130  for(int tensId = 0; tensId < mpoNumSites; ++tensId) {
131    mpoModeExtentsPtr[tensId] = mpoModeExtents[tensId].data();
132    const auto tensRank = mpoModeExtents[tensId].size();
133    int64_t tensVol = 1;
134    for(int i = 0; i < tensRank; ++i) {
135      tensVol *= mpoModeExtents[tensId][i];
136    }
137    mpoTensors[tensId].resize(tensVol);
138    for(int64_t i = 0; i < tensVol; ++i) {
139      mpoTensors[tensId][i] = std::complex<double>(rnd(), rnd());
140    }
141  }
142  // Allocate and copy MPO tensors into Device memory
143  std::vector<void*> d_mpoTensors(mpoNumSites, nullptr);
144  for(int tensId = 0; tensId < mpoNumSites; ++tensId) {
145    const auto tensRank = mpoModeExtents[tensId].size();
146    int64_t tensVol = 1;
147    for(int i = 0; i < tensRank; ++i) {
148      tensVol *= mpoModeExtents[tensId][i];
149    }
150    HANDLE_CUDA_ERROR(cudaMalloc(&(d_mpoTensors[tensId]), 
151                                 std::size_t(tensVol) * (2 * fp64size)));
152    HANDLE_CUDA_ERROR(cudaMemcpy(d_mpoTensors[tensId], mpoTensors[tensId].data(),
153                                 std::size_t(tensVol) * (2 * fp64size), cudaMemcpyHostToDevice));
154  }
155  std::cout << "Allocated and defined MPO tensors in GPU memory\n";

Define and allocate MPS tensors

Here we define the shapes of the MPS tensors and allocate Device memory for their storage.

159  // Define the MPS representation and allocate memory buffers for the MPS tensors
160  std::vector<ModeExtents> mpsModeExtents(numQudits);
161  std::vector<int64_t*> mpsModeExtentsPtr(numQudits, nullptr);
162  std::vector<void*> d_mpsTensors(numQudits, nullptr);
163  upperBondDim = 1;
164  for (int tensId = 0; tensId < (numQudits / 2); ++tensId) {
165    const auto leftBondDim = std::min(mpsBondDim, upperBondDim);
166    const auto rightBondDim = std::min(mpsBondDim, leftBondDim * quditDim);
167    if (tensId == 0) { // left boundary MPS tensor  
168      mpsModeExtents[tensId] = {quditDim, rightBondDim};
169      HANDLE_CUDA_ERROR(cudaMalloc(&(d_mpsTensors[tensId]),
170                                   std::size_t(quditDim * rightBondDim) * (2 * fp64size)));
171    } else { // middle MPS tensors
172      mpsModeExtents[tensId] = {leftBondDim, quditDim, rightBondDim};
173      HANDLE_CUDA_ERROR(cudaMalloc(&(d_mpsTensors[tensId]),
174                                   std::size_t(leftBondDim * quditDim * rightBondDim) * (2 * fp64size)));
175    }
176    upperBondDim = rightBondDim;
177    mpsModeExtentsPtr[tensId] = mpsModeExtents[tensId].data();
178  }
179  centralBondDim = upperBondDim;
180  if(numQudits % 2 != 0) {
181    const int tensId = numQudits / 2;
182    mpsModeExtents[tensId] = {centralBondDim, quditDim, centralBondDim};
183    mpsModeExtentsPtr[tensId] = mpsModeExtents[tensId].data();
184  }
185  upperBondDim = 1;
186  for (int tensId = (numQudits-1); tensId >= (numQudits / 2) + (numQudits % 2); --tensId) {
187    const auto rightBondDim = std::min(mpsBondDim, upperBondDim);
188    const auto leftBondDim = std::min(mpsBondDim, rightBondDim * quditDim);
189    if (tensId == (numQudits-1)) { // right boundary MPS tensor  
190      mpsModeExtents[tensId] = {leftBondDim, quditDim};
191      HANDLE_CUDA_ERROR(cudaMalloc(&(d_mpsTensors[tensId]),
192                                   std::size_t(leftBondDim * quditDim) * (2 * fp64size)));
193    } else { // middle MPS tensors
194      mpsModeExtents[tensId] = {leftBondDim, quditDim, rightBondDim};
195      HANDLE_CUDA_ERROR(cudaMalloc(&(d_mpsTensors[tensId]),
196                                   std::size_t(leftBondDim * quditDim * rightBondDim) * (2 * fp64size)));
197    }
198    upperBondDim = leftBondDim;
199    mpsModeExtentsPtr[tensId] = mpsModeExtents[tensId].data();
200  }
201  std::cout << "Allocated MPS tensors in GPU memory\n";

Allocate the scratch buffer on GPU

205  // Query free memory on Device and allocate a scratch buffer
206  std::size_t freeSize {0}, totalSize {0};
207  HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeSize, &totalSize));
208  const std::size_t scratchSize = (freeSize - (freeSize % 4096)) / 2; // use half of available memory with alignment
209  void *d_scratch {nullptr};
210  HANDLE_CUDA_ERROR(cudaMalloc(&d_scratch, scratchSize));
211  std::cout << "Allocated " << scratchSize << " bytes of scratch memory on GPU: "
212            << "[" << d_scratch << ":" << (void*)(((char*)(d_scratch))  + scratchSize) << ")\n";

Create a pure tensor network state

Now let’s create a pure tensor network state for a quantum circuit with the given number of qubits (vacuum state).

216  // Create the initial quantum state
217  cutensornetState_t quantumState;
218  HANDLE_CUTN_ERROR(cutensornetCreateState(cutnHandle, CUTENSORNET_STATE_PURITY_PURE, numQudits, quditDims.data(),
219                    dataType, &quantumState));
220  std::cout << "Created the initial (vacuum) quantum state\n";

Construct necessary MPO tensor network operators

Let’s construct two MPO tensor network operators acting on different subsets of qubits.

224  // Construct the MPO tensor network operators
225  int64_t componentId;
226  cutensornetNetworkOperator_t tnOperator1;
227  HANDLE_CUTN_ERROR(cutensornetCreateNetworkOperator(cutnHandle, numQudits, quditDims.data(),
228                    dataType, &tnOperator1));
229  HANDLE_CUTN_ERROR(cutensornetNetworkOperatorAppendMPO(cutnHandle, tnOperator1, make_cuDoubleComplex(1.0, 0.0),
230                    mpoNumSites, std::vector<int32_t>({0,1,2,3}).data(),
231                    mpoModeExtentsPtr.data(), /*strides=*/nullptr,
232                    std::vector<const void*>(d_mpoTensors.cbegin(), d_mpoTensors.cend()).data(),
233                    CUTENSORNET_BOUNDARY_CONDITION_OPEN, &componentId));
234  cutensornetNetworkOperator_t tnOperator2;
235  HANDLE_CUTN_ERROR(cutensornetCreateNetworkOperator(cutnHandle, numQudits, quditDims.data(),
236                    dataType, &tnOperator2));
237  HANDLE_CUTN_ERROR(cutensornetNetworkOperatorAppendMPO(cutnHandle, tnOperator2, make_cuDoubleComplex(1.0, 0.0),
238                    mpoNumSites, std::vector<int32_t>({2,3,4,5}).data(),
239                    mpoModeExtentsPtr.data(), /*strides=*/nullptr,
240                    std::vector<const void*>(d_mpoTensors.cbegin(), d_mpoTensors.cend()).data(),
241                    CUTENSORNET_BOUNDARY_CONDITION_OPEN, &componentId));
242  std::cout << "Constructed two MPO tensor network operators\n";

Apply MPO tensor network operators to the quantum circuit state

Let’s construct the target quantum circuit by applying MPO tensor network operators in an alternating fashion.

246  // Apply the MPO tensor network operators to the quantum state
247  for(int layer = 0; layer < mpoNumLayers; ++layer) {
248    int64_t operatorId;
249    if(layer % 2 == 0) {
250      HANDLE_CUTN_ERROR(cutensornetStateApplyNetworkOperator(cutnHandle, quantumState, tnOperator1,
251                        1, 0, 0, &operatorId));
252    }else{
253      HANDLE_CUTN_ERROR(cutensornetStateApplyNetworkOperator(cutnHandle, quantumState, tnOperator2,
254        1, 0, 0, &operatorId));
255    }
256  }
257  std::cout << "Applied " << mpoNumLayers << " MPO gates to the quantum state\n";

Request MPS factorization for the final quantum circuit state

Here we express our intent to factorize the final state of the quantum circuit using MPS factorization. The provided shapes (mode extents) of the MPS tensors represent maximum size limits during renormalization. Computed mode extents for the final MPS tensors are less than or equal to these limits. Note: that no computation is done here yet.

261  // Specify the target MPS representation (use default strides)
262  HANDLE_CUTN_ERROR(cutensornetStateFinalizeMPS(cutnHandle, quantumState, 
263                    CUTENSORNET_BOUNDARY_CONDITION_OPEN, mpsModeExtentsPtr.data(), /*strides=*/nullptr ));
264  std::cout << "Finalized the form of the desired MPS representation\n";

Configure MPS factorization procedure

After expressing our intent to perform MPS factorization of the final quantum circuit state, we can also configure the MPS factorization +procedure by setting different options, for example, the SVD algorithm.

268  // Set up the SVD method for bonds truncation (optional)
269  cutensornetTensorSVDAlgo_t algo = CUTENSORNET_TENSOR_SVD_ALGO_GESVDJ; 
270  HANDLE_CUTN_ERROR(cutensornetStateConfigure(cutnHandle, quantumState, 
271                    CUTENSORNET_STATE_MPS_SVD_CONFIG_ALGO, &algo, sizeof(algo)));
272  cutensornetStateMPOApplication_t mpsApplication = CUTENSORNET_STATE_MPO_APPLICATION_EXACT; 
273  // Use exact MPS-MPO application as extent of 8 can faithfully represent a 6 qubit state (optional)
274  HANDLE_CUTN_ERROR(cutensornetStateConfigure(cutnHandle, quantumState, 
275                    CUTENSORNET_STATE_CONFIG_MPS_MPO_APPLICATION, &mpsApplication, sizeof(mpsApplication)));
276  std::cout << "Configured the MPS computation\n";

Prepare the computation of MPS factorization

Let’s create a workspace descriptor and prepare the computation of the MPS factorization of the final quantum circuit state.

280  // Prepare the MPS computation and attach a workspace buffer
281  cutensornetWorkspaceDescriptor_t workDesc;
282  HANDLE_CUTN_ERROR(cutensornetCreateWorkspaceDescriptor(cutnHandle, &workDesc));
283  std::cout << "Created the workspace descriptor\n";
284  HANDLE_CUTN_ERROR(cutensornetStatePrepare(cutnHandle, quantumState, scratchSize, workDesc, 0x0));
285  int64_t worksize {0};
286  HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
287                                                      workDesc,
288                                                      CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
289                                                      CUTENSORNET_MEMSPACE_DEVICE,
290                                                      CUTENSORNET_WORKSPACE_SCRATCH,
291                                                      &worksize));
292  std::cout << "Scratch GPU workspace size (bytes) for the MPS computation = " << worksize << std::endl;
293  if(worksize <= scratchSize) {
294    HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
295                      CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
296  }else{
297    std::cout << "ERROR: Insufficient workspace size on Device!\n";
298    std::abort();
299  }
300  std::cout << "Set the workspace buffer and prepared the MPS computation\n";

Compute MPS factorization

Once the MPS factorization procedure has been configured and prepared, let’s compute the MPS factorization of the final quantum circuit state.

304  // Compute the MPS factorization of the quantum circuit state
305  HANDLE_CUTN_ERROR(cutensornetStateCompute(cutnHandle, quantumState, 
306                    workDesc, mpsModeExtentsPtr.data(), /*strides=*/nullptr, d_mpsTensors.data(), 0));
307  std::cout << "Computed the MPS factorization of the quantum circuit state\n";
308  

Create the tensor network state sampler

Once the quantum circuit state has been constructed and factorized using the MPS representation, let’s create the tensor network state sampler for all qubits.

311  // Create the quantum circuit sampler
312  cutensornetStateSampler_t sampler;
313  HANDLE_CUTN_ERROR(cutensornetCreateSampler(cutnHandle, quantumState, numQudits, nullptr, &sampler));
314  std::cout << "Created the quantum circuit sampler\n";

Configure the tensor network state sampler

Optionally, we can configure the tensor network state sampler by setting the number of hyper-samples to be used by the tensor network contraction path finder.

318  // Configure the quantum circuit sampler
319  const int32_t numHyperSamples = 8; // desired number of hyper samples used in the tensor network contraction path finder
320  HANDLE_CUTN_ERROR(cutensornetSamplerConfigure(cutnHandle, sampler,
321                    CUTENSORNET_SAMPLER_OPT_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples)));

Prepare the tensor network state sampler

Now let’s prepare the tensor network state sampler.

325  // Prepare the quantum circuit sampler
326  HANDLE_CUTN_ERROR(cutensornetSamplerPrepare(cutnHandle, sampler, scratchSize, workDesc, 0x0));
327  std::cout << "Prepared the quantum circuit state sampler\n";

Set up the workspace

Now we can set up the required workspace buffer.

331  // Attach the workspace buffer
332  HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
333                                                      workDesc,
334                                                      CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
335                                                      CUTENSORNET_MEMSPACE_DEVICE,
336                                                      CUTENSORNET_WORKSPACE_SCRATCH,
337                                                      &worksize));
338  std::cout << "Scratch GPU workspace size (bytes) for MPS Sampling = " << worksize << std::endl;
339  assert(worksize > 0);
340  if(worksize <= scratchSize) {
341    HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
342                      CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
343  }else{
344    std::cout << "ERROR: Insufficient workspace size on Device!\n";
345    std::abort();
346  }
347  std::cout << "Set the workspace buffer\n";

Perform sampling of the final quantum circuit state

Once everything had been set up, we perform sampling of the final MPS-factorized quantum circuit state and print the output samples.

351  // Sample the quantum circuit state
352  std::vector<int64_t> samples(numQudits * numSamples); // samples[SampleId][QuditId] reside in Host memory
353  HANDLE_CUTN_ERROR(cutensornetSamplerSample(cutnHandle, sampler, numSamples, workDesc, samples.data(), 0));
354  std::cout << "Performed quantum circuit state sampling\n";
355  std::cout << "Generated samples:\n";
356  for(int64_t i = 0; i < numSamples; ++i) {
357    for(int64_t j = 0; j < numQudits; ++j) std::cout << " " << samples[i * numQudits + j];
358    std::cout << std::endl;
359  }

Free resources

363  // Destroy the quantum circuit sampler
364  HANDLE_CUTN_ERROR(cutensornetDestroySampler(sampler));
365  std::cout << "Destroyed the quantum circuit state sampler\n";
366
367  // Destroy the workspace descriptor
368  HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc));
369  std::cout << "Destroyed the workspace descriptor\n";
370
371  // Destroy the MPO tensor network operators
372  HANDLE_CUTN_ERROR(cutensornetDestroyNetworkOperator(tnOperator2));
373  HANDLE_CUTN_ERROR(cutensornetDestroyNetworkOperator(tnOperator1));
374  std::cout << "Destroyed the MPO tensor network operators\n";
375
376  // Destroy the quantum circuit state
377  HANDLE_CUTN_ERROR(cutensornetDestroyState(quantumState));
378  std::cout << "Destroyed the quantum circuit state\n";
379
380  // Free GPU buffers
381  HANDLE_CUDA_ERROR(cudaFree(d_scratch));
382  for (int32_t i = 0; i < numQudits; ++i) {
383    HANDLE_CUDA_ERROR(cudaFree(d_mpsTensors[i]));
384  }
385  for (int32_t i = 0; i < mpoNumSites; ++i) {
386    HANDLE_CUDA_ERROR(cudaFree(d_mpoTensors[i]));
387  }
388  std::cout << "Freed memory on GPU\n";
389  
390  // Finalize the cuTensorNet library
391  HANDLE_CUTN_ERROR(cutensornetDestroy(cutnHandle));
392  std::cout << "Finalized the cuTensorNet library\n";
393
394  return 0;
395}