Computing Matrix Product State Marginal Distribution Tensor¶
The following code example illustrates how to define a tensor network state, compute its Matrix Product State (MPS) factorization, and then compute a marginal distribution tensor for the MPS-factorized state. The full code can be found in the NVIDIA/cuQuantum repository (here).
Headers and error handling¶
7#include <cstdlib>
8#include <cstdio>
9#include <cassert>
10#include <complex>
11#include <vector>
12#include <bitset>
13#include <iostream>
14
15#include <cuda_runtime.h>
16#include <cutensornet.h>
17
18
19#define HANDLE_CUDA_ERROR(x) \
20{ const auto err = x; \
21 if( err != cudaSuccess ) \
22 { printf("CUDA error %s in line %d\n", cudaGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
23};
24
25#define HANDLE_CUTN_ERROR(x) \
26{ const auto err = x; \
27 if( err != CUTENSORNET_STATUS_SUCCESS ) \
28 { printf("cuTensorNet error %s in line %d\n", cutensornetGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
29};
30
31
32int main(int argc, char **argv)
33{
34 static_assert(sizeof(size_t) == sizeof(int64_t), "Please build this sample on a 64-bit architecture!");
35
36 constexpr std::size_t fp64size = sizeof(double);
Define the tensor network state and the desired marginal distribution tensor¶
Let’s define a tensor network state corresponding to a 16-qubit quantum circuit and request the marginal distribution tensor for qubits 0 and 1.
40 // Quantum state configuration
41 constexpr int32_t numQubits = 16;
42 const std::vector<int64_t> qubitDims(numQubits,2); // qubit dimensions
43 constexpr int32_t numMarginalModes = 2; // rank of the marginal (reduced density matrix)
44 const std::vector<int32_t> marginalModes({0,1}); // open qubits (must be in acsending order)
45 std::cout << "Quantum circuit: " << numQubits << " qubits\n";
Initialize the cuTensorNet library handle¶
49 // Initialize the cuTensorNet library
50 HANDLE_CUDA_ERROR(cudaSetDevice(0));
51 cutensornetHandle_t cutnHandle;
52 HANDLE_CUTN_ERROR(cutensornetCreate(&cutnHandle));
53 std::cout << "Initialized cuTensorNet library on GPU 0\n";
Define quantum gates on GPU¶
57 // Define necessary quantum gate tensors in Host memory
58 const double invsq2 = 1.0 / std::sqrt(2.0);
59 // Hadamard gate
60 const std::vector<std::complex<double>> h_gateH {{invsq2, 0.0}, {invsq2, 0.0},
61 {invsq2, 0.0}, {-invsq2, 0.0}};
62 // CX gate
63 const std::vector<std::complex<double>> h_gateCX {{1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
64 {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
65 {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0},
66 {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}};
67
68 // Copy quantum gates to Device memory
69 void *d_gateH{nullptr}, *d_gateCX{nullptr};
70 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateH, 4 * (2 * fp64size)));
71 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateCX, 16 * (2 * fp64size)));
72 std::cout << "Allocated quantum gate memory on GPU\n";
73 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateH, h_gateH.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
74 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateCX, h_gateCX.data(), 16 * (2 * fp64size), cudaMemcpyHostToDevice));
75 std::cout << "Copied quantum gates to GPU memory\n";
Allocate MPS tensors¶
Here we set the shapes of MPS tensors and allocate GPU memory for their storage.
79 // Determine the MPS representation and allocate buffers for the MPS tensors
80 const int64_t maxExtent = 2; // GHZ state can be exactly represented with max bond dimension of 2
81 std::vector<std::vector<int64_t>> extents;
82 std::vector<int64_t*> extentsPtr(numQubits);
83 std::vector<void*> d_mpsTensors(numQubits, nullptr);
84 for (int32_t i = 0; i < numQubits; i++) {
85 if (i == 0) { // left boundary MPS tensor
86 extents.push_back({2, maxExtent});
87 HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * 2 * fp64size));
88 }
89 else if (i == numQubits-1) { // right boundary MPS tensor
90 extents.push_back({maxExtent, 2});
91 HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * 2 * fp64size));
92 }
93 else { // middle MPS tensors
94 extents.push_back({maxExtent, 2, maxExtent});
95 HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * maxExtent * 2 * fp64size));
96 }
97 extentsPtr[i] = extents[i].data();
98 }
Allocate the marginal distribution tensor on GPU¶
Here we allocate the marginal distribution tensor, that is, the reduced density matrix for qubits 0 and 1, on GPU.
102 // Allocate the specified quantum circuit reduced density matrix (marginal) in Device memory
103 void *d_rdm{nullptr};
104 std::size_t rdmDim = 1;
105 for(const auto & mode: marginalModes) rdmDim *= qubitDims[mode];
106 const std::size_t rdmSize = rdmDim * rdmDim;
107 HANDLE_CUDA_ERROR(cudaMalloc(&d_rdm, rdmSize * (2 * fp64size)));
Allocate the scratch buffer on GPU¶
111 // Query the free memory on Device
112 std::size_t freeSize{0}, totalSize{0};
113 HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeSize, &totalSize));
114 const std::size_t scratchSize = (freeSize - (freeSize % 4096)) / 2; // use half of available memory with alignment
115 void *d_scratch{nullptr};
116 HANDLE_CUDA_ERROR(cudaMalloc(&d_scratch, scratchSize));
117 std::cout << "Allocated " << scratchSize << " bytes of scratch memory on GPU\n";
Create a pure tensor network state¶
Now let’s create a pure tensor network state for a 16-qubit quantum circuit.
121 // Create the initial quantum state
122 cutensornetState_t quantumState;
123 HANDLE_CUTN_ERROR(cutensornetCreateState(cutnHandle, CUTENSORNET_STATE_PURITY_PURE, numQubits, qubitDims.data(),
124 CUDA_C_64F, &quantumState));
125 std::cout << "Created the initial quantum state\n";
Apply quantum gates¶
Let’s construct the GHZ quantum circuit by applying the corresponding quantum gates.
129 // Construct the final quantum circuit state (apply quantum gates) for the GHZ circuit
130 int64_t id;
131 HANDLE_CUTN_ERROR(cutensornetStateApplyTensor(cutnHandle, quantumState, 1, std::vector<int32_t>{{0}}.data(),
132 d_gateH, nullptr, 1, 0, 1, &id));
133 for(int32_t i = 1; i < numQubits; ++i) {
134 HANDLE_CUTN_ERROR(cutensornetStateApplyTensor(cutnHandle, quantumState, 2, std::vector<int32_t>{{i-1,i}}.data(),
135 d_gateCX, nullptr, 1, 0, 1, &id));
136 }
137 std::cout << "Applied quantum gates\n";
Request MPS factorization for the final quantum circuit state¶
Here we express our intent to factorize the final quantum circuit state using MPS factorization. The provided shapes of the MPS tensors refer to their maximal size limit during the MPS renormalization procedure. The actually computed shapes of the final MPS tensors may be smaller. No computation is done here yet.
141 // Specify the final target MPS representation (use default fortran strides)
142 HANDLE_CUTN_ERROR(cutensornetStateFinalizeMPS(cutnHandle, quantumState,
143 CUTENSORNET_BOUNDARY_CONDITION_OPEN, extentsPtr.data(), /*strides=*/nullptr));
144 std::cout << "Requested the final MPS factorization of the quantum circuit state\n";
Configure MPS factorization procedure¶
After expressing our intent to perform MPS factorization of the final quantum circuit state, we can also configure the MPS factorization procedure by resetting different options, for example, the SVD algorithm.
148 // Optional, set up the SVD method for MPS truncation.
149 cutensornetTensorSVDAlgo_t algo = CUTENSORNET_TENSOR_SVD_ALGO_GESVDJ;
150 HANDLE_CUTN_ERROR(cutensornetStateConfigure(cutnHandle, quantumState,
151 CUTENSORNET_STATE_MPS_SVD_CONFIG_ALGO, &algo, sizeof(algo)));
152 std::cout << "Configured the MPS factorization computation\n";
Prepare the computation of MPS factorization¶
Let’s create a workspace descriptor and prepare the computation of MPS factorization.
156 // Prepare the MPS computation and attach workspace
157 cutensornetWorkspaceDescriptor_t workDesc;
158 HANDLE_CUTN_ERROR(cutensornetCreateWorkspaceDescriptor(cutnHandle, &workDesc));
159 std::cout << "Created the workspace descriptor\n";
160 HANDLE_CUTN_ERROR(cutensornetStatePrepare(cutnHandle, quantumState, scratchSize, workDesc, 0x0));
161 int64_t worksize {0};
162 HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
163 workDesc,
164 CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
165 CUTENSORNET_MEMSPACE_DEVICE,
166 CUTENSORNET_WORKSPACE_SCRATCH,
167 &worksize));
168 std::cout << "Scratch GPU workspace size (bytes) for MPS computation = " << worksize << std::endl;
169 if(worksize <= scratchSize) {
170 HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
171 CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
172 }else{
173 std::cout << "ERROR: Insufficient workspace size on Device!\n";
174 std::abort();
175 }
176 std::cout << "Set the workspace buffer for the MPS factorization computation\n";
Compute MPS factorization¶
Once the MPS factorization procedure has been configured and prepared, let’s compute the MPS factorization of the final quantum circuit state.
180 // Execute MPS computation
181 HANDLE_CUTN_ERROR(cutensornetStateCompute(cutnHandle, quantumState,
182 workDesc, extentsPtr.data(), /*strides=*/nullptr, d_mpsTensors.data(), 0));
183 std::cout << "Computed the MPS factorization\n";
Create the marginal distribution object¶
Once the quantum circuit has been constructed, let’s create the marginal distribution object that will compute the marginal distribution tensor (reduced density matrix) for qubits 0 and 1.
187 // Specify the desired reduced density matrix (marginal)
188 cutensornetStateMarginal_t marginal;
189 HANDLE_CUTN_ERROR(cutensornetCreateMarginal(cutnHandle, quantumState, numMarginalModes, marginalModes.data(),
190 0, nullptr, std::vector<int64_t>{{1,2,4,8}}.data(), &marginal)); // using explicit strides
191 std::cout << "Created the specified quantum circuit reduced densitry matrix (marginal)\n";
Configure the marginal distribution object¶
Now we can configure the marginal distribution object by setting the number of hyper-samples to be used by the tensor network contraction path finder.
195 // Configure the computation of the specified quantum circuit reduced density matrix (marginal)
196 const int32_t numHyperSamples = 8; // desired number of hyper samples used in the tensor network contraction path finder
197 HANDLE_CUTN_ERROR(cutensornetMarginalConfigure(cutnHandle, marginal,
198 CUTENSORNET_MARGINAL_OPT_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples)));
199 std::cout << "Configured the specified quantum circuit reduced density matrix (marginal) computation\n";
Prepare the computation of the marginal distribution tensor¶
Let’s prepare the computation of the marginal distribution tensor.
203 // Prepare the specified quantum circuit reduced densitry matrix (marginal)
204 HANDLE_CUTN_ERROR(cutensornetMarginalPrepare(cutnHandle, marginal, scratchSize, workDesc, 0x0));
205 std::cout << "Prepared the specified quantum circuit reduced density matrix (marginal)\n";
Set up the workspace¶
Now we can set up the required workspace buffer.
209 // Attach the workspace buffer
210 HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
211 workDesc,
212 CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
213 CUTENSORNET_MEMSPACE_DEVICE,
214 CUTENSORNET_WORKSPACE_SCRATCH,
215 &worksize));
216 std::cout << "Required scratch GPU workspace size (bytes) for marginal computation = " << worksize << std::endl;
217 if(worksize <= scratchSize) {
218 HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
219 CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
220 }else{
221 std::cout << "ERROR: Insufficient workspace size on Device!\n";
222 std::abort();
223 }
224 std::cout << "Set the workspace buffer\n";
225
Compute the marginal distribution tensor¶
Once everything has been set up, we compute the requested marginal distribution tensor (reduced density matrix) for qubits 0 and 1, copy it back to Host memory, and print it.
228 // Compute the specified quantum circuit reduced densitry matrix (marginal)
229 HANDLE_CUTN_ERROR(cutensornetMarginalCompute(cutnHandle, marginal, nullptr, workDesc, d_rdm, 0));
230 std::cout << "Computed the specified quantum circuit reduced density matrix (marginal)\n";
231 std::vector<std::complex<double>> h_rdm(rdmSize);
232 HANDLE_CUDA_ERROR(cudaMemcpy(h_rdm.data(), d_rdm, rdmSize * (2 * fp64size), cudaMemcpyDeviceToHost));
233 std::cout << "Reduced density matrix for " << numMarginalModes << " qubits:\n";
234 for(std::size_t i = 0; i < rdmDim; ++i) {
235 for(std::size_t j = 0; j < rdmDim; ++j) {
236 std::cout << " " << h_rdm[i + j * rdmDim];
237 }
238 std::cout << std::endl;
239 }
Free resources¶
243 // Destroy the workspace descriptor
244 HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc));
245 std::cout << "Destroyed the workspace descriptor\n";
246
247 // Destroy the quantum circuit reduced density matrix
248 HANDLE_CUTN_ERROR(cutensornetDestroyMarginal(marginal));
249 std::cout << "Destroyed the quantum circuit state reduced density matrix (marginal)\n";
250
251 // Destroy the quantum circuit state
252 HANDLE_CUTN_ERROR(cutensornetDestroyState(quantumState));
253 std::cout << "Destroyed the quantum circuit state\n";
254
255 for (int32_t i = 0; i < numQubits; i++) {
256 HANDLE_CUDA_ERROR(cudaFree(d_mpsTensors[i]));
257 }
258 HANDLE_CUDA_ERROR(cudaFree(d_scratch));
259 HANDLE_CUDA_ERROR(cudaFree(d_rdm));
260 HANDLE_CUDA_ERROR(cudaFree(d_gateCX));
261 HANDLE_CUDA_ERROR(cudaFree(d_gateH));
262 std::cout << "Freed memory on GPU\n";
263
264 // Finalize the cuTensorNet library
265 HANDLE_CUTN_ERROR(cutensornetDestroy(cutnHandle));
266 std::cout << "Finalized the cuTensorNet library\n";
267
268 return 0;
269}