Code example (mixed state)#

Simulating a noisy circuit with a mixed (density-matrix) state#

The following code example illustrates how to define a mixed tensor network state (density matrix), apply a noisy quantum channel to it, and then compute both the reduced density matrix and its diagonal (the marginal probability distribution). The full code can be found in the NVIDIA/cuQuantum repository (here).

Headers and error handling#

 7#include <cstdlib>
 8#include <cstdio>
 9#include <cassert>
10#include <complex>
11#include <vector>
12#include <iostream>
13
14#include <cuda_runtime.h>
15#include <cutensornet.h>
16
17
18#define HANDLE_CUDA_ERROR(x) \
19{ const auto err = x; \
20  if( err != cudaSuccess ) \
21  { printf("CUDA error %s in line %d\n", cudaGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
22};
23
24#define HANDLE_CUTN_ERROR(x) \
25{ const auto err = x; \
26  if( err != CUTENSORNET_STATUS_SUCCESS ) \
27  { printf("cuTensorNet error %s in line %d\n", cutensornetGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \
28};
29
30
31int main()
32{
33  static_assert(sizeof(size_t) == sizeof(int64_t), "Please build this sample on a 64-bit architecture!");
34
35  constexpr std::size_t fp64size = sizeof(double);

Define the mixed-state geometry#

Let’s define a mixed state corresponding to a 6-qubit quantum circuit and request the reduced density matrix (and its diagonal) for qubits 0 and 1.

39  // Quantum state configuration: build a mixed (density-matrix) state of a noisy circuit
40  constexpr int32_t numQubits = 6; // number of qubits
41  const std::vector<int64_t> qubitDims(numQubits, 2); // qubit dimensions
42  const std::vector<int32_t> marginalModes({0, 1}); // open qubits defining the marginal (ascending order)
43  const int32_t numMarginalModes = marginalModes.size();
44  std::cout << "Mixed-state (density-matrix) simulation of a " << numQubits << "-qubit noisy circuit\n";

Initialize the cuTensorNet library handle#

48  // Initialize the cuTensorNet library
49  HANDLE_CUDA_ERROR(cudaSetDevice(0));
50  cutensornetHandle_t cutnHandle;
51  HANDLE_CUTN_ERROR(cutensornetCreate(&cutnHandle));
52  std::cout << "Initialized cuTensorNet library on GPU 0\n";

Define quantum gates and Kraus operators#

In addition to the gate tensors, we define the Kraus operators of an amplitude-damping channel. They satisfy the completeness relation \(K_0^\dagger K_0 + K_1^\dagger K_1 = I\), making the channel trace preserving.

56  // Define the quantum gate tensors (Host memory)
57  const double invsq2 = 1.0 / std::sqrt(2.0);
58  //  Hadamard gate
59  const std::vector<std::complex<double>> h_gateH {{invsq2, 0.0},  {invsq2, 0.0},
60                                                   {invsq2, 0.0}, {-invsq2, 0.0}};
61  //  CX gate
62  const std::vector<std::complex<double>> h_gateCX {{1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
63                                                    {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0},
64                                                    {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0},
65                                                    {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0}, {0.0, 0.0}};
66
67  // Define the Kraus operators of an amplitude-damping channel (rate gamma), stored
68  // column-major like the gate tensors. K0 = diag(1, sqrt(1-gamma)); K1 has a single
69  // entry sqrt(gamma) in the |0><1| position. They satisfy K0^dag K0 + K1^dag K1 = I.
70  const double gamma = 0.2;
71  const double s = std::sqrt(1.0 - gamma);
72  const double g = std::sqrt(gamma);
73  const std::vector<std::complex<double>> h_kraus0 {{1.0, 0.0}, {0.0, 0.0},
74                                                    {0.0, 0.0}, {s,   0.0}};
75  const std::vector<std::complex<double>> h_kraus1 {{0.0, 0.0}, {0.0, 0.0},
76                                                    {g,   0.0}, {0.0, 0.0}};
77
78  // Copy the gate and Kraus tensors to Device memory
79  void *d_gateH{nullptr}, *d_gateCX{nullptr}, *d_kraus0{nullptr}, *d_kraus1{nullptr};
80  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateH, 4 * (2 * fp64size)));
81  HANDLE_CUDA_ERROR(cudaMalloc(&d_gateCX, 16 * (2 * fp64size)));
82  HANDLE_CUDA_ERROR(cudaMalloc(&d_kraus0, 4 * (2 * fp64size)));
83  HANDLE_CUDA_ERROR(cudaMalloc(&d_kraus1, 4 * (2 * fp64size)));
84  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateH, h_gateH.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
85  HANDLE_CUDA_ERROR(cudaMemcpy(d_gateCX, h_gateCX.data(), 16 * (2 * fp64size), cudaMemcpyHostToDevice));
86  HANDLE_CUDA_ERROR(cudaMemcpy(d_kraus0, h_kraus0.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
87  HANDLE_CUDA_ERROR(cudaMemcpy(d_kraus1, h_kraus1.data(), 4 * (2 * fp64size), cudaMemcpyHostToDevice));
88  std::cout << "Copied quantum gates and Kraus operators to GPU memory\n";

Allocate the reduced density matrix and its diagonal#

The full reduced density matrix is a rank-\(2M\) tensor (here \(M = 2\) marginal modes), while its diagonal – the marginal probability distribution – is a rank-\(M\) tensor.

92  // Allocate Device memory for the full reduced density matrix (marginal) and its diagonal
93  std::size_t rdmDim = 1;
94  for(const auto & mode: marginalModes) rdmDim *= qubitDims[mode];
95  const std::size_t rdmSize = rdmDim * rdmDim;           // full RDM has rank 2*numMarginalModes
96  void *d_rdm{nullptr}, *d_rdmDiag{nullptr};
97  HANDLE_CUDA_ERROR(cudaMalloc(&d_rdm, rdmSize * (2 * fp64size)));
98  HANDLE_CUDA_ERROR(cudaMalloc(&d_rdmDiag, rdmDim * (2 * fp64size))); // diagonal has rank numMarginalModes
99  std::cout << "Allocated memory for the reduced density matrix and its diagonal\n";

Allocate the scratch buffer on GPU#

103  // Query the free memory on Device and allocate scratch
104  std::size_t freeSize{0}, totalSize{0};
105  HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeSize, &totalSize));
106  const std::size_t scratchSize = (freeSize - (freeSize % 4096)) / 2; // use half of available memory with alignment
107  void *d_scratch{nullptr};
108  HANDLE_CUDA_ERROR(cudaMalloc(&d_scratch, scratchSize));
109  std::cout << "Allocated " << scratchSize << " bytes of scratch memory on GPU\n";

Create a mixed tensor network state#

Now let’s create a mixed tensor network state by passing CUTENSORNET_STATE_PURITY_MIXED, whose fundamental object is the density matrix \(\rho\).

113  // Create the initial MIXED (density-matrix) quantum state
114  cutensornetState_t quantumState;
115  HANDLE_CUTN_ERROR(cutensornetCreateState(cutnHandle, CUTENSORNET_STATE_PURITY_MIXED, numQubits, qubitDims.data(),
116                    CUDA_C_64F, &quantumState));
117  std::cout << "Created the initial mixed quantum state\n";

Apply quantum gates and a noisy channel#

We construct a GHZ circuit and then apply a general (non-unitary) quantum channel with cutensornetStateApplyGeneralChannel(). Applying a general channel is only valid for a mixed state; it evolves \(\rho\) deterministically.

121  // Construct the GHZ circuit, then apply an amplitude-damping channel to qubit 0
122  int64_t id;
123  HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 1, std::vector<int32_t>{{0}}.data(),
124                    d_gateH, nullptr, 1, 0, 1, &id));
125  for(int32_t i = 1; i < numQubits; ++i) {
126    HANDLE_CUTN_ERROR(cutensornetStateApplyTensorOperator(cutnHandle, quantumState, 2, std::vector<int32_t>{{i-1,i}}.data(),
127                      d_gateCX, nullptr, 1, 0, 1, &id));
128  }
129  std::cout << "Applied quantum gates (GHZ circuit)\n";
130
131  // Apply a general (non-unitary) quantum channel; this is only valid for a mixed state.
132  void *channelKraus[] = {d_kraus0, d_kraus1};
133  int64_t channelId;
134  HANDLE_CUTN_ERROR(cutensornetStateApplyGeneralChannel(cutnHandle, quantumState, 1, std::vector<int32_t>{{0}}.data(),
135                    2, channelKraus, nullptr, &channelId));
136  std::cout << "Applied an amplitude-damping channel to qubit 0\n";

Create the marginal and marginal-diagonal objects#

cutensornetCreateMarginal() defines the full reduced density matrix, while cutensornetCreateMarginalDiagonal() defines only its diagonal. The two factories return the same opaque handle type and share the same lifecycle/configure/prepare/compute APIs.

140  // Create both the full reduced density matrix and its diagonal (marginal probabilities).
141  // The two factories share the same opaque handle type and lifecycle APIs.
142  cutensornetStateMarginal_t marginal, marginalDiag;
143  HANDLE_CUTN_ERROR(cutensornetCreateMarginal(cutnHandle, quantumState, numMarginalModes, marginalModes.data(),
144                    0, nullptr, nullptr, &marginal));
145  HANDLE_CUTN_ERROR(cutensornetCreateMarginalDiagonal(cutnHandle, quantumState, numMarginalModes, marginalModes.data(),
146                    0, nullptr, nullptr, &marginalDiag));
147  std::cout << "Created the reduced density matrix (full) and its diagonal\n";

Configure the marginal objects#

151  // Configure both computations with the same number of hyper samples for the path finder
152  const int32_t numHyperSamples = 8;
153  HANDLE_CUTN_ERROR(cutensornetMarginalConfigure(cutnHandle, marginal,
154                    CUTENSORNET_MARGINAL_CONFIG_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples)));
155  HANDLE_CUTN_ERROR(cutensornetMarginalConfigure(cutnHandle, marginalDiag,
156                    CUTENSORNET_MARGINAL_CONFIG_NUM_HYPER_SAMPLES, &numHyperSamples, sizeof(numHyperSamples)));

Create the workspace and a prepare/compute helper#

We create a workspace descriptor and a small helper that prepares, attaches the workspace, and computes a given marginal handle (reused for both the full RDM and its diagonal).

160  // Create a workspace descriptor (reused for both computations)
161  cutensornetWorkspaceDescriptor_t workDesc;
162  HANDLE_CUTN_ERROR(cutensornetCreateWorkspaceDescriptor(cutnHandle, &workDesc));
163
164  // Helper lambda: prepare, attach workspace, and compute a marginal handle into d_out
165  auto prepareAndCompute = [&](cutensornetStateMarginal_t handle, void *d_out, const char *label) {
166    HANDLE_CUTN_ERROR(cutensornetMarginalPrepare(cutnHandle, handle, scratchSize, workDesc, 0x0));
167    int64_t worksize {0};
168    HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle, workDesc,
169                      CUTENSORNET_WORKSIZE_PREF_RECOMMENDED, CUTENSORNET_MEMSPACE_DEVICE,
170                      CUTENSORNET_WORKSPACE_SCRATCH, &worksize));
171    if(worksize > static_cast<int64_t>(scratchSize)) {
172      std::cout << "ERROR: Insufficient workspace size on Device for " << label << "!\n";
173      std::abort();
174    }
175    HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE,
176                      CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, worksize));
177    HANDLE_CUTN_ERROR(cutensornetMarginalCompute(cutnHandle, handle, nullptr, workDesc, d_out, 0x0));
178  };

Compute the reduced density matrix#

182  // Compute the full reduced density matrix
183  prepareAndCompute(marginal, d_rdm, "reduced density matrix");
184  std::vector<std::complex<double>> h_rdm(rdmSize);
185  HANDLE_CUDA_ERROR(cudaMemcpy(h_rdm.data(), d_rdm, rdmSize * (2 * fp64size), cudaMemcpyDeviceToHost));
186  std::cout << "\nReduced density matrix on qubits {0,1}:\n";
187  std::complex<double> trace{0.0, 0.0};
188  for(std::size_t i = 0; i < rdmDim; ++i) {
189    for(std::size_t j = 0; j < rdmDim; ++j) {
190      std::cout << " " << h_rdm[i + j * rdmDim];
191    }
192    trace += h_rdm[i + i * rdmDim];
193    std::cout << std::endl;
194  }
195  std::cout << "Tr(RDM) = " << trace.real() << " (should be ~1.0)\n";

Compute the marginal probability distribution#

The diagonal of the reduced density matrix is the marginal probability distribution over the requested modes; its entries are real and sum to one.

199  // Compute the diagonal of the reduced density matrix (marginal probabilities)
200  prepareAndCompute(marginalDiag, d_rdmDiag, "marginal diagonal");
201  std::vector<std::complex<double>> h_rdmDiag(rdmDim);
202  HANDLE_CUDA_ERROR(cudaMemcpy(h_rdmDiag.data(), d_rdmDiag, rdmDim * (2 * fp64size), cudaMemcpyDeviceToHost));
203  std::cout << "\nMarginal probability distribution over qubits {0,1}:\n";
204  double probSum = 0.0;
205  for(std::size_t i = 0; i < rdmDim; ++i) {
206    std::cout << " p[" << i << "] = " << h_rdmDiag[i].real() << std::endl;
207    probSum += h_rdmDiag[i].real();
208  }
209  std::cout << "Sum of probabilities = " << probSum << " (should be ~1.0)\n";

Free resources#

213  // Clean up
214  HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc));
215  HANDLE_CUTN_ERROR(cutensornetDestroyMarginal(marginalDiag));
216  HANDLE_CUTN_ERROR(cutensornetDestroyMarginal(marginal));
217  HANDLE_CUTN_ERROR(cutensornetDestroyState(quantumState));
218  std::cout << "\nDestroyed the cuTensorNet objects\n";
219
220  HANDLE_CUDA_ERROR(cudaFree(d_scratch));
221  HANDLE_CUDA_ERROR(cudaFree(d_rdmDiag));
222  HANDLE_CUDA_ERROR(cudaFree(d_rdm));
223  HANDLE_CUDA_ERROR(cudaFree(d_kraus1));
224  HANDLE_CUDA_ERROR(cudaFree(d_kraus0));
225  HANDLE_CUDA_ERROR(cudaFree(d_gateCX));
226  HANDLE_CUDA_ERROR(cudaFree(d_gateH));
227  std::cout << "Freed memory on GPU\n";
228
229  HANDLE_CUTN_ERROR(cutensornetDestroy(cutnHandle));
230  std::cout << "Finalized the cuTensorNet library\n";
231
232  return 0;
233}