Sampling the Matrix Product State (circuit with Matrix Product Operators)¶
The following code example illustrates how to define a tensor network state by a circuit with Matrix Product Operators (MPO), then compute its Matrix Product State (MPS) factorization, and, finally, sample the MPS-factorized state. The full code can be found in the NVIDIA/cuQuantum repository (here).
Headers and error handling¶
7#include <cstdlib>
8#include <cstdio>
9#include <cmath>
10#include <cassert>
11#include <complex>
12#include <random>
13#include <functional>
14#include <vector>
15#include <iostream>
16
17#include <cuda_runtime.h>
18#include <cutensornet.h>
19
20#define HANDLE_CUDA_ERROR(x) \
21{ const auto err = x; \
22 if( err != cudaSuccess ) \
23 { printf("CUDA error %s in line %d\n", cudaGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
24};
25
26#define HANDLE_CUTN_ERROR(x) \
27{ const auto err = x; \
28 if( err != CUTENSORNET_STATUS_SUCCESS ) \
29 { printf("cuTensorNet error %s in line %d\n", cutensornetGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
30};
31
32
33int main()
34{
35 static_assert(sizeof(size_t) == sizeof(int64_t), "Please build this sample on a 64-bit architecture!");
36
37 constexpr std::size_t fp64size = sizeof(double);
38 constexpr cudaDataType_t dataType = CUDA_C_64F;
Define the tensor network state and the desired number of output samples to generate¶
Let’s define all parameters of the tensor network state for a quantum circuit with the given number of qubits and request to produce a given number of output samples for the full qubit register. We also configure the maximal bond dimension for the MPS and MPO tensor network representations used in this example. For the MPO, we also define the number of sites (qubits) it acts on as well as the number of layers of the MPO.
42 // Quantum state configuration
43 const int64_t numSamples = 128; // number of samples to generate
44 const int32_t numQudits = 6; // number of qudits
45 const int64_t quditDim = 2; // dimension of all qudits
46 const std::vector<int64_t> quditDims(numQudits, quditDim); // qudit dimensions
47 const int64_t mpsBondDim = 8; // maximum MPS bond dimension
48 const int64_t mpoBondDim = 2; // maximum MPO bond dimension
49 const int32_t mpoNumLayers = 5; // number of MPO layers
50 constexpr int32_t mpoNumSites = 4; // number of MPO sites
51 std::cout << "Quantum circuit: " << numQudits << " qudits; " << numSamples << " samples\n";
52
53 /* Action of five alternating four-site MPO gates (operators)
54 on the 6-quqit quantum register (illustration):
55
56 Q----X---------X---------X----
57 | | |
58 Q----X---------X---------X----
59 | | |
60 Q----X----X----X----X----X----
61 | | | | |
62 Q----X----X----X----X----X----
63 | |
64 Q---------X---------X---------
65 | |
66 Q---------X---------X---------
67
68 |layer|
69 */
70 static_assert(mpoNumSites > 1, "Number of MPO sites must be larger than one!");
71
72 // Random number generator
73 std::default_random_engine generator;
74 std::uniform_real_distribution<double> distribution(-1.0, 1.0);
75 auto rnd = std::bind(distribution, generator);
Initialize the cuTensorNet library handle¶
79 // Initialize the cuTensorNet library
80 HANDLE_CUDA_ERROR(cudaSetDevice(0));
81 cutensornetHandle_t cutnHandle;
82 HANDLE_CUTN_ERROR(cutensornetCreate(&cutnHandle));
83 std::cout << "Initialized cuTensorNet library on GPU 0\n";
Define and allocate MPO tensors¶
Here we define the shapes of the MPO tensors, fill them in with random data on Host, and, finally, transfer them into Device memory.
87 /* MPO tensor mode numeration (open boundary condition):
88
89 2 3 2
90 | | |
91 X--1------0--X--2---- ... ----0--X
92 | | |
93 0 1 1
94
95 */
96 // Define shapes of the MPO tensors
97 using GateData = std::vector<std::complex<double>>;
98 using ModeExtents = std::vector<int64_t>;
99 std::vector<GateData> mpoTensors(mpoNumSites); // MPO tensors
100 std::vector<ModeExtents> mpoModeExtents(mpoNumSites); // mode extents for MPO tensors
101 int64_t upperBondDim = 1;
102 for(int tensId = 0; tensId < (mpoNumSites / 2); ++tensId) {
103 const auto leftBondDim = std::min(mpoBondDim, upperBondDim);
104 const auto rightBondDim = std::min(mpoBondDim, leftBondDim * (quditDim * quditDim));
105 if(tensId == 0) {
106 mpoModeExtents[tensId] = {quditDim, rightBondDim, quditDim};
107 }else{
108 mpoModeExtents[tensId] = {leftBondDim, quditDim, rightBondDim, quditDim};
109 }
110 upperBondDim = rightBondDim;
111 }
112 auto centralBondDim = upperBondDim;
113 if(mpoNumSites % 2 != 0) {
114 const int tensId = mpoNumSites / 2;
115 mpoModeExtents[tensId] = {centralBondDim, quditDim, centralBondDim, quditDim};
116 }
117 upperBondDim = 1;
118 for(int tensId = (mpoNumSites-1); tensId >= (mpoNumSites / 2) + (mpoNumSites % 2); --tensId) {
119 const auto rightBondDim = std::min(mpoBondDim, upperBondDim);
120 const auto leftBondDim = std::min(mpoBondDim, rightBondDim * (quditDim * quditDim));
121 if(tensId == (mpoNumSites-1)) {
122 mpoModeExtents[tensId] = {leftBondDim, quditDim, quditDim};
123 }else{
124 mpoModeExtents[tensId] = {leftBondDim, quditDim, rightBondDim, quditDim};
125 }
126 upperBondDim = leftBondDim;
127 }
128 // Fill in the MPO tensors with random data on Host
129 std::vector<const int64_t*> mpoModeExtentsPtr(mpoNumSites, nullptr);
130 for(int tensId = 0; tensId < mpoNumSites; ++tensId) {
131 mpoModeExtentsPtr[tensId] = mpoModeExtents[tensId].data();
132 const auto tensRank = mpoModeExtents[tensId].size();
133 int64_t tensVol = 1;
134 for(int i = 0; i < tensRank; ++i) {
135 tensVol *= mpoModeExtents[tensId][i];
136 }
137 mpoTensors[tensId].resize(tensVol);
138 for(int64_t i = 0; i < tensVol; ++i) {
139 mpoTensors[tensId][i] = std::complex<double>(rnd(), rnd());
140 }
141 }
142 // Allocate and copy MPO tensors into Device memory
143 std::vector<void*> d_mpoTensors(mpoNumSites, nullptr);
144 for(int tensId = 0; tensId < mpoNumSites; ++tensId) {
145 const auto tensRank = mpoModeExtents[tensId].size();
146 int64_t tensVol = 1;
147 for(int i = 0; i < tensRank; ++i) {
148 tensVol *= mpoModeExtents[tensId][i];
149 }
150 HANDLE_CUDA_ERROR(cudaMalloc(&(d_mpoTensors[tensId]),
151 std::size_t(tensVol) * (2 * fp64size)));
152 HANDLE_CUDA_ERROR(cudaMemcpy(d_mpoTensors[tensId], mpoTensors[tensId].data(),
153 std::size_t(tensVol) * (2 * fp64size), cudaMemcpyHostToDevice));
154 }
155 std::cout << "Allocated and defined MPO tensors in GPU memory\n";
Define and allocate MPS tensors¶
Here we define the shapes of the MPS tensors and allocate Device memory for their storage.
159 // Define the MPS representation and allocate memory buffers for the MPS tensors
160 std::vector<ModeExtents> mpsModeExtents(numQudits);
161 std::vector<int64_t*> mpsModeExtentsPtr(numQudits, nullptr);
162 std::vector<void*> d_mpsTensors(numQudits, nullptr);
163 upperBondDim = 1;
164 for (int tensId = 0; tensId < (numQudits / 2); ++tensId) {
165 const auto leftBondDim = std::min(mpsBondDim, upperBondDim);
166 const auto rightBondDim = std::min(mpsBondDim, leftBondDim * quditDim);
167 if (tensId == 0) { // left boundary MPS tensor
168 mpsModeExtents[tensId] = {quditDim, rightBondDim};
169 HANDLE_CUDA_ERROR(cudaMalloc(&(d_mpsTensors[tensId]),
170 std::size_t(quditDim * rightBondDim) * (2 * fp64size)));
171 } else { // middle MPS tensors
172 mpsModeExtents[tensId] = {leftBondDim, quditDim, rightBondDim};
173 HANDLE_CUDA_ERROR(cudaMalloc(&(d_mpsTensors[tensId]),
174 std::size_t(leftBondDim * quditDim * rightBondDim) * (2 * fp64size)));
175 }
176 upperBondDim = rightBondDim;
177 mpsModeExtentsPtr[tensId] = mpsModeExtents[tensId].data();
178 }
179 centralBondDim = upperBondDim;
180 if(numQudits % 2 != 0) {
181 const int tensId = numQudits / 2;
182 mpsModeExtents[tensId] = {centralBondDim, quditDim, centralBondDim};
183 mpsModeExtentsPtr[tensId] = mpsModeExtents[tensId].data();
184 }
185 upperBondDim = 1;
186 for (int tensId = (numQudits-1); tensId >= (numQudits / 2) + (numQudits % 2); --tensId) {
187 const auto rightBondDim = std::min(mpsBondDim, upperBondDim);
188 const auto leftBondDim = std::min(mpsBondDim, rightBondDim * quditDim);
189 if (tensId == (numQudits-1)) { // right boundary MPS tensor
190 mpsModeExtents[tensId] = {leftBondDim, quditDim};
191 HANDLE_CUDA_ERROR(cudaMalloc(&(d_mpsTensors[tensId]),
192 std::size_t(leftBondDim * quditDim) * (2 * fp64size)));
193 } else { // middle MPS tensors
194 mpsModeExtents[tensId] = {leftBondDim, quditDim, rightBondDim};
195 HANDLE_CUDA_ERROR(cudaMalloc(&(d_mpsTensors[tensId]),
196 std::size_t(leftBondDim * quditDim * rightBondDim) * (2 * fp64size)));
197 }
198 upperBondDim = leftBondDim;
199 mpsModeExtentsPtr[tensId] = mpsModeExtents[tensId].data();
200 }
201 std::cout << "Allocated MPS tensors in GPU memory\n";
Allocate the scratch buffer on GPU¶
205 // Query free memory on Device and allocate a scratch buffer
206 std::size_t freeSize {0}, totalSize {0};
207 HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeSize, &totalSize));
208 const std::size_t scratchSize = (freeSize - (freeSize % 4096)) / 2; // use half of available memory with alignment
209 void *d_scratch {nullptr};
210 HANDLE_CUDA_ERROR(cudaMalloc(&d_scratch, scratchSize));
211 std::cout << "Allocated " << scratchSize << " bytes of scratch memory on GPU: "
212 << "[" << d_scratch << ":" << (void*)(((char*)(d_scratch)) + scratchSize) << ")\n";
Create a pure tensor network state¶
Now let’s create a pure tensor network state for a quantum circuit with the given number of qubits (vacuum state).
216 // Create the initial quantum state
217 cutensornetState_t quantumState;
218 HANDLE_CUTN_ERROR(cutensornetCreateState(cutnHandle, CUTENSORNET_STATE_PURITY_PURE, numQudits, quditDims.data(),
219 dataType, &quantumState));
220 std::cout << "Created the initial (vacuum) quantum state\n";
Construct necessary MPO tensor network operators¶
Let’s construct two MPO tensor network operators acting on different subsets of qubits.
224 // Construct the MPO tensor network operators
225 int64_t componentId;
226 cutensornetNetworkOperator_t tnOperator1;
227 HANDLE_CUTN_ERROR(cutensornetCreateNetworkOperator(cutnHandle, numQudits, quditDims.data(),
228 dataType, &tnOperator1));
229 HANDLE_CUTN_ERROR(cutensornetNetworkOperatorAppendMPO(cutnHandle, tnOperator1, make_cuDoubleComplex(1.0, 0.0),
230 mpoNumSites, std::vector<int32_t>({0,1,2,3}).data(),
231 mpoModeExtentsPtr.data(), /*strides=*/nullptr,
232 std::vector<const void*>(d_mpoTensors.cbegin(), d_mpoTensors.cend()).data(),
233 CUTENSORNET_BOUNDARY_CONDITION_OPEN, &componentId));
234 cutensornetNetworkOperator_t tnOperator2;
235 HANDLE_CUTN_ERROR(cutensornetCreateNetworkOperator(cutnHandle, numQudits, quditDims.data(),
236 dataType, &tnOperator2));
237 HANDLE_CUTN_ERROR(cutensornetNetworkOperatorAppendMPO(cutnHandle, tnOperator2, make_cuDoubleComplex(1.0, 0.0),
238 mpoNumSites, std::vector<int32_t>({2,3,4,5}).data(),
239 mpoModeExtentsPtr.data(), /*strides=*/nullptr,
240 std::vector<const void*>(d_mpoTensors.cbegin(), d_mpoTensors.cend()).data(),
241 CUTENSORNET_BOUNDARY_CONDITION_OPEN, &componentId));
242 std::cout << "Constructed two MPO tensor network operators\n";
Apply MPO tensor network operators to the quantum circuit state¶
Let’s construct the target quantum circuit by applying MPO tensor network operators in an alternating fashion.
246 // Apply the MPO tensor network operators to the quantum state
247 for(int layer = 0; layer < mpoNumLayers; ++layer) {
248 int64_t operatorId;
249 if(layer % 2 == 0) {
250 HANDLE_CUTN_ERROR(cutensornetStateApplyNetworkOperator(cutnHandle, quantumState, tnOperator1,
251 1, 0, 0, &operatorId));
252 }else{
253 HANDLE_CUTN_ERROR(cutensornetStateApplyNetworkOperator(cutnHandle, quantumState, tnOperator2,
254 1, 0, 0, &operatorId));
255 }
256 }
257 std::cout << "Applied " << mpoNumLayers << " MPO gates to the quantum state\n";
Request MPS factorization for the final quantum circuit state¶
Here we express our intent to factorize the final state of the quantum circuit using MPS factorization. The provided shapes (mode extents) of the MPS tensors represent maximum size limits during renormalization. Computed mode extents for the final MPS tensors are less than or equal to these limits. Note: that no computation is done here yet.
261 // Specify the target MPS representation (use default strides)
262 HANDLE_CUTN_ERROR(cutensornetStateFinalizeMPS(cutnHandle, quantumState,
263 CUTENSORNET_BOUNDARY_CONDITION_OPEN, mpsModeExtentsPtr.data(), /*strides=*/nullptr ));
264 std::cout << "Finalized the form of the desired MPS representation\n";
Configure MPS factorization procedure¶
After expressing our intent to perform MPS factorization of the final quantum circuit state, we can also configure the MPS factorization +procedure by setting different options, for example, the SVD algorithm.
268 // Set up the SVD method for bonds truncation (optional)
269 cutensornetTensorSVDAlgo_t algo = CUTENSORNET_TENSOR_SVD_ALGO_GESVDJ;
270 HANDLE_CUTN_ERROR(cutensornetStateConfigure(cutnHandle, quantumState,
271 CUTENSORNET_STATE_MPS_SVD_CONFIG_ALGO, &algo, sizeof(algo)));
272 cutensornetStateMPOApplication_t mpsApplication = CUTENSORNET_STATE_MPO_APPLICATION_EXACT;
273 // Use exact MPS-MPO application as extent of 8 can faithfully represent a 6 qubit state (optional)
274 HANDLE_CUTN_ERROR(cutensornetStateConfigure(cutnHandle, quantumState,
275 CUTENSORNET_STATE_CONFIG_MPS_MPO_APPLICATION, &mpsApplication, sizeof(mpsApplication)));
276 std::cout << "Configured the MPS computation\n";
Prepare the computation of MPS factorization¶
Let’s create a workspace descriptor and prepare the computation of the MPS factorization of the final quantum circuit state.
280 // Prepare the MPS computation and attach a workspace buffer
281 cutensornetWorkspaceDescriptor_t workDesc;
282 HANDLE_CUTN_ERROR(cutensornetCreateWorkspaceDescriptor(cutnHandle, &workDesc));
283 std::cout << "Created the workspace descriptor\n";
284 HANDLE_CUTN_ERROR(cutensornetStatePrepare(cutnHandle, quantumState, scratchSize, workDesc, 0x0));
285 int64_t worksize {0};
286 HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
287 workDesc,
288 CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
289 CUTENSORNET_MEMSPACE_DEVICE,
290 CUTENSORNET_WORKSPACE_SCRATCH,
291 &worksize));
292 std::cout << "Scratch GPU workspace size (bytes) for the MPS computation = " << worksize << std::endl;
293 if(worksize <= scratchSize) {
294 HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
295 CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
296 }else{
297 std::cout << "ERROR: Insufficient workspace size on Device!\n";
298 std::abort();
299 }
300 std::cout << "Set the workspace buffer and prepared the MPS computation\n";
Compute MPS factorization¶
Once the MPS factorization procedure has been configured and prepared, let’s compute the MPS factorization of the final quantum circuit state.
304 // Compute the MPS factorization of the quantum circuit state
305 HANDLE_CUTN_ERROR(cutensornetStateCompute(cutnHandle, quantumState,
306 workDesc, mpsModeExtentsPtr.data(), /*strides=*/nullptr, d_mpsTensors.data(), 0));
307 std::cout << "Computed the MPS factorization of the quantum circuit state\n";
308
Create the tensor network state sampler¶
Once the quantum circuit state has been constructed and factorized using the MPS representation, let’s create the tensor network state sampler for all qubits.
311 // Create the quantum circuit sampler
312 cutensornetStateSampler_t sampler;
313 HANDLE_CUTN_ERROR(cutensornetCreateSampler(cutnHandle, quantumState, numQudits, nullptr, &sampler));
314 std::cout << "Created the quantum circuit sampler\n";
Configure the tensor network state sampler¶
Optionally, we can configure the tensor network state sampler by setting the number of hyper-samples to be used by the tensor network contraction path finder.
318 // Configure the quantum circuit sampler
319 const int32_t numHyperSamples = 8; // desired number of hyper samples used in the tensor network contraction path finder
320 HANDLE_CUTN_ERROR(cutensornetSamplerConfigure(cutnHandle, sampler,
321 CUTENSORNET_SAMPLER_OPT_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples)));
Prepare the tensor network state sampler¶
Now let’s prepare the tensor network state sampler.
325 // Prepare the quantum circuit sampler
326 HANDLE_CUTN_ERROR(cutensornetSamplerPrepare(cutnHandle, sampler, scratchSize, workDesc, 0x0));
327 std::cout << "Prepared the quantum circuit state sampler\n";
Set up the workspace¶
Now we can set up the required workspace buffer.
331 // Attach the workspace buffer
332 HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
333 workDesc,
334 CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
335 CUTENSORNET_MEMSPACE_DEVICE,
336 CUTENSORNET_WORKSPACE_SCRATCH,
337 &worksize));
338 std::cout << "Scratch GPU workspace size (bytes) for MPS Sampling = " << worksize << std::endl;
339 assert(worksize > 0);
340 if(worksize <= scratchSize) {
341 HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
342 CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
343 }else{
344 std::cout << "ERROR: Insufficient workspace size on Device!\n";
345 std::abort();
346 }
347 std::cout << "Set the workspace buffer\n";
Perform sampling of the final quantum circuit state¶
Once everything had been set up, we perform sampling of the final MPS-factorized quantum circuit state and print the output samples.
351 // Sample the quantum circuit state
352 std::vector<int64_t> samples(numQudits * numSamples); // samples[SampleId][QuditId] reside in Host memory
353 HANDLE_CUTN_ERROR(cutensornetSamplerSample(cutnHandle, sampler, numSamples, workDesc, samples.data(), 0));
354 std::cout << "Performed quantum circuit state sampling\n";
355 std::cout << "Generated samples:\n";
356 for(int64_t i = 0; i < numSamples; ++i) {
357 for(int64_t j = 0; j < numQudits; ++j) std::cout << " " << samples[i * numQudits + j];
358 std::cout << std::endl;
359 }
Free resources¶
363 // Destroy the quantum circuit sampler
364 HANDLE_CUTN_ERROR(cutensornetDestroySampler(sampler));
365 std::cout << "Destroyed the quantum circuit state sampler\n";
366
367 // Destroy the workspace descriptor
368 HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc));
369 std::cout << "Destroyed the workspace descriptor\n";
370
371 // Destroy the MPO tensor network operators
372 HANDLE_CUTN_ERROR(cutensornetDestroyNetworkOperator(tnOperator2));
373 HANDLE_CUTN_ERROR(cutensornetDestroyNetworkOperator(tnOperator1));
374 std::cout << "Destroyed the MPO tensor network operators\n";
375
376 // Destroy the quantum circuit state
377 HANDLE_CUTN_ERROR(cutensornetDestroyState(quantumState));
378 std::cout << "Destroyed the quantum circuit state\n";
379
380 // Free GPU buffers
381 HANDLE_CUDA_ERROR(cudaFree(d_scratch));
382 for (int32_t i = 0; i < numQudits; ++i) {
383 HANDLE_CUDA_ERROR(cudaFree(d_mpsTensors[i]));
384 }
385 for (int32_t i = 0; i < mpoNumSites; ++i) {
386 HANDLE_CUDA_ERROR(cudaFree(d_mpoTensors[i]));
387 }
388 std::cout << "Freed memory on GPU\n";
389
390 // Finalize the cuTensorNet library
391 HANDLE_CUTN_ERROR(cutensornetDestroy(cutnHandle));
392 std::cout << "Finalized the cuTensorNet library\n";
393
394 return 0;
395}