Computing Tensor Network State Marginal Distribution Tensor¶
The following code example illustrates how to define a tensor network state and then compute the tensor network state marginal distribution tensor. The full code can be found in the NVIDIA/cuQuantum repository (here).
Headers and error handling¶
7#include <cstdlib>
8#include <cstdio>
9#include <cassert>
10#include <complex>
11#include <vector>
12#include <bitset>
13#include <iostream>
14
15#include <cuda_runtime.h>
16#include <cutensornet.h>
17
18
19#define HANDLE_CUDA_ERROR(x) \
20{ const auto err = x; \
21 if( err != cudaSuccess ) \
22 { printf("CUDA error %s in line %d\n", cudaGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
23};
24
25#define HANDLE_CUTN_ERROR(x) \
26{ const auto err = x; \
27 if( err != CUTENSORNET_STATUS_SUCCESS ) \
28 { printf("cuTensorNet error %s in line %d\n", cutensornetGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
29};
30
31
32int main(int argc, char **argv)
33{
34 constexpr std::size_t fp64size = sizeof(double);
Define the tensor network state and the desired marginal distribution tensor¶
Let’s define a tensor network state corresponding to a 16-qubit quantum circuit and request the marginal distribution tensor for qubits 0 and 1.
38 // Quantum state configuration
39 constexpr int32_t numQubits = 16;
40 const std::vector<int64_t> qubitDims(numQubits,2); // qubit dimensions
41 constexpr int32_t numMarginalModes = 2; // rank of the marginal (reduced density matrix)
42 const std::vector<int32_t> marginalModes({0,1}); // open qubits (must be in acsending order)
43 std::cout << "Quantum circuit: " << numQubits << " qubits\n";
Initialize the cuTensorNet library handle¶
47 // Initialize the cuTensorNet library
48 HANDLE_CUDA_ERROR(cudaSetDevice(0));
49 cutensornetHandle_t cutnHandle;
50 HANDLE_CUTN_ERROR(cutensornetCreate(&cutnHandle));
51 std::cout << "Initialized cuTensorNet library on GPU 0\n";
Define quantum gates on GPU¶
55 // Define necessary quantum gate tensors in Host memory
56 const double invsq2 = 1.0 / std::sqrt(2.0);
57 // Hadamard gate
58 const std::vector<std::complex<double>> h_gateH {{invsq2, 0.0}, {invsq2, 0.0},
59 {invsq2, 0.0}, {-invsq2, 0.0}};
60 // CX gate
61 const std::vector<std::complex<double>> h_gateCX {{1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
62 {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
63 {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0},
64 {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}};
65
66 // Copy quantum gates to Device memory
67 void *d_gateH{nullptr}, *d_gateCX{nullptr};
68 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateH, 4 * (2 * fp64size)));
69 HANDLE_CUDA_ERROR(cudaMalloc(&d_gateCX, 16 * (2 * fp64size)));
70 std::cout << "Allocated quantum gate memory on GPU\n";
71 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateH, h_gateH.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
72 HANDLE_CUDA_ERROR(cudaMemcpy(d_gateCX, h_gateCX.data(), 16 * (2 * fp64size), cudaMemcpyHostToDevice));
73 std::cout << "Copied quantum gates to GPU memory\n";
Allocate the marginal distribution tensor on GPU¶
Here we allocate the marginal distribution tensor, that is, the reduced density matrix for qubits 0 and 1 on GPU.
77 // Allocate the specified quantum circuit reduced density matrix (marginal) in Device memory
78 void *d_rdm{nullptr};
79 std::size_t rdmDim = 1;
80 for(const auto & mode: marginalModes) rdmDim *= qubitDims[mode];
81 const std::size_t rdmSize = rdmDim * rdmDim;
82 HANDLE_CUDA_ERROR(cudaMalloc(&d_rdm, rdmSize * (2 * fp64size)));
Allocate the scratch buffer on GPU¶
86 // Query the free memory on Device
87 std::size_t freeSize{0}, totalSize{0};
88 HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeSize, &totalSize));
89 const std::size_t scratchSize = (freeSize - (freeSize % 4096)) / 2; // use half of available memory with alignment
90 void *d_scratch{nullptr};
91 HANDLE_CUDA_ERROR(cudaMalloc(&d_scratch, scratchSize));
92 std::cout << "Allocated " << scratchSize << " bytes of scratch memory on GPU\n";
Create a pure tensor network state¶
Now let’s create a pure tensor network state for a 16-qubit quantum circuit.
96 // Create the initial quantum state
97 cutensornetState_t quantumState;
98 HANDLE_CUTN_ERROR(cutensornetCreateState(cutnHandle, CUTENSORNET_STATE_PURITY_PURE, numQubits, qubitDims.data(),
99 CUDA_C_64F, &quantumState));
100 std::cout << "Created the initial quantum state\n";
Apply quantum gates¶
Let’s construct the GHZ quantum circuit by applying the corresponding quantum gates.
104 // Construct the final quantum circuit state (apply quantum gates) for the GHZ circuit
105 int64_t id;
106 HANDLE_CUTN_ERROR(cutensornetStateApplyTensor(cutnHandle, quantumState, 1, std::vector<int32_t>{{0}}.data(),
107 d_gateH, nullptr, 1, 0, 1, &id));
108 for(int32_t i = 1; i < numQubits; ++i) {
109 HANDLE_CUTN_ERROR(cutensornetStateApplyTensor(cutnHandle, quantumState, 2, std::vector<int32_t>{{i-1,i}}.data(),
110 d_gateCX, nullptr, 1, 0, 1, &id));
111 }
112 std::cout << "Applied quantum gates\n";
Create the marginal distribution object¶
Once the quantum circuit has been constructed, let’s create the marginal distribution object that will compute the marginal distribution tensor (reduced density matrix) for qubits 0 and 1.
116 // Specify the desired reduced density matrix (marginal)
117 cutensornetStateMarginal_t marginal;
118 HANDLE_CUTN_ERROR(cutensornetCreateMarginal(cutnHandle, quantumState, numMarginalModes, marginalModes.data(),
119 0, nullptr, std::vector<int64_t>{{1,2,4,8}}.data(), &marginal)); // using explicit strides
120 std::cout << "Created the specified quantum circuit reduced densitry matrix (marginal)\n";
Configure the marginal distribution object¶
Now we can configure the marginal distribution object by setting the number of hyper-samples to be used by the tensor network contraction path finder.
124 // Configure the computation of the specified quantum circuit reduced density matrix (marginal)
125 const int32_t numHyperSamples = 8; // desired number of hyper samples used in the tensor network contraction path finder
126 HANDLE_CUTN_ERROR(cutensornetMarginalConfigure(cutnHandle, marginal,
127 CUTENSORNET_MARGINAL_OPT_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples)));
Prepare the computation of the marginal distribution tensor¶
Let’s create a workspace descriptor and prepare the computation of the marginal distribution tensor.
131 // Prepare the specified quantum circuit reduced densitry matrix (marginal)
132 cutensornetWorkspaceDescriptor_t workDesc;
133 HANDLE_CUTN_ERROR(cutensornetCreateWorkspaceDescriptor(cutnHandle, &workDesc));
134 std::cout << "Created the workspace descriptor\n";
135 HANDLE_CUTN_ERROR(cutensornetMarginalPrepare(cutnHandle, marginal, scratchSize, workDesc, 0x0));
136 std::cout << "Prepared the specified quantum circuit reduced density matrix (marginal)\n";
Set up the workspace¶
Now we can set up the required workspace buffer.
140 // Attach the workspace buffer
141 int64_t worksize {0};
142 HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle,
143 workDesc,
144 CUTENSORNET_WORKSIZE_PREF_RECOMMENDED,
145 CUTENSORNET_MEMSPACE_DEVICE,
146 CUTENSORNET_WORKSPACE_SCRATCH,
147 &worksize));
148 std::cout << "Required scratch GPU workspace size (bytes) = " << worksize << std::endl;
149 if(worksize <= scratchSize) {
150 HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
151 CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
152 }else{
153 std::cout << "ERROR: Insufficient workspace size on Device!\n";
154 std::abort();
155 }
156 std::cout << "Set the workspace buffer\n";
Compute the marginal distribution tensor¶
Once everything had been set up, we compute the requested marginal distribution tensor (reduced density matrix) for qubits 0 and 1, copy it back to Host memory, and print it.
160 // Compute the specified quantum circuit reduced densitry matrix (marginal)
161 HANDLE_CUTN_ERROR(cutensornetMarginalCompute(cutnHandle, marginal, nullptr, workDesc, d_rdm, 0));
162 std::cout << "Computed the specified quantum circuit reduced density matrix (marginal)\n";
163 std::vector<std::complex<double>> h_rdm(rdmSize);
164 HANDLE_CUDA_ERROR(cudaMemcpy(h_rdm.data(), d_rdm, rdmSize * (2 * fp64size), cudaMemcpyDeviceToHost));
165 std::cout << "Reduced density matrix for " << numMarginalModes << " qubits:\n";
166 for(std::size_t i = 0; i < rdmDim; ++i) {
167 for(std::size_t j = 0; j < rdmDim; ++j) {
168 std::cout << " " << h_rdm[i + j * rdmDim];
169 }
170 std::cout << std::endl;
171 }
Free resources¶
175 // Destroy the workspace descriptor
176 HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc));
177 std::cout << "Destroyed the workspace descriptor\n";
178
179 // Destroy the quantum circuit reduced density matrix
180 HANDLE_CUTN_ERROR(cutensornetDestroyMarginal(marginal));
181 std::cout << "Destroyed the quantum circuit state reduced density matrix (marginal)\n";
182
183 // Destroy the quantum circuit state
184 HANDLE_CUTN_ERROR(cutensornetDestroyState(quantumState));
185 std::cout << "Destroyed the quantum circuit state\n";
186
187 HANDLE_CUDA_ERROR(cudaFree(d_scratch));
188 HANDLE_CUDA_ERROR(cudaFree(d_rdm));
189 HANDLE_CUDA_ERROR(cudaFree(d_gateCX));
190 HANDLE_CUDA_ERROR(cudaFree(d_gateH));
191 std::cout << "Freed memory on GPU\n";
192
193 // Finalize the cuTensorNet library
194 HANDLE_CUTN_ERROR(cutensornetDestroy(cutnHandle));
195 std::cout << "Finalized the cuTensorNet library\n";
196
197 return 0;
198}