The following code example illustrates how to integrate cuTensorNet functionalities to perform basic MPS simulation.
The workflow is encapsulated in an MPSHelper
class. The full code can be found in
the NVIDIA/cuQuantum repository (here).
Define MPSHelper class¶
We first define an MPSHelper
class to keep track of the modes and extents of all physical and virtual bonds.
The simulation settings are also stored in this class. Once out of scope, all resource owned by this class will be freed.
75class MPSHelper
76{
77 public:
78 /**
79 * \brief Construct an MPSHelper object for gate splitting algorithm.
80 * i j k
81 * -------A-------B------- i j k
82 * p| |q -------> -------A`-------B`-------
83 * GGGGGGGGG r| |s
84 * r| |s
85 * \param[in] numSites The number of sites in the MPS
86 * \param[in] physExtent The extent for the physical mode where the gate tensors are acted on.
87 * \param[in] maxVirtualExtent The maximal extent allowed for the virtual mode shared between adjacent MPS tensors.
88 * \param[in] initialVirtualExtents A vector of size \p numSites-1 where the ith element denotes the extent of the shared mode for site i and site i+1 in the beginning of the simulation.
89 * \param[in] typeData The data type for all tensors and gates
90 * \param[in] typeCompute The compute type for all gate splitting process
91 */
92 MPSHelper(int32_t numSites,
93 int64_t physExtent,
94 int64_t maxVirtualExtent,
95 const std::vector<int64_t>& initialVirtualExtents,
96 cudaDataType_t typeData,
97 cutensornetComputeType_t typeCompute);
98
99 /**
100 * \brief Initialize the MPS metadata and cutensornet library.
101 */
102 cutensornetStatus_t initialize();
103
104 /**
105 * \brief Compute the maximal number of elements for each site.
106 */
107 std::vector<size_t> getMaxTensorElements() const;
108
109 /**
110 * \brief Update the SVD truncation setting.
111 * \param[in] absCutoff The cutoff value for absolute singular value truncation.
112 * \param[in] relCutoff The cutoff value for relative singular value truncation.
113 * \param[in] renorm The option for renormalization of the truncated singular values.
114 * \param[in] partition The option for partitioning of the singular values.
115 */
116 cutensornetStatus_t setSVDConfig(double absCutoff,
117 double relCutoff,
118 cutensornetTensorSVDNormalization_t renorm,
119 cutensornetTensorSVDPartition_t partition);
120
121 /**
122 * \brief Update the algorithm to use for the gating process.
123 * \param[in] gateAlgo The gate algorithm to use for MPS simulation.
124 */
125 void setGateAlgorithm(cutensornetGateSplitAlgo_t gateAlgo) {gateAlgo_ = gateAlgo;}
126
127 /**
128 * \brief Compute the maximal workspace needed for MPS gating algorithm.
129 * \param[out] workspaceSize The required workspace size on the device.
130 */
131 cutensornetStatus_t computeMaxWorkspaceSizes(int64_t* workspaceSize);
132
133 /**
134 * \brief Compute the maximal workspace needed for MPS gating algorithm.
135 * \param[in] work Pointer to the allocated workspace.
136 * \param[in] workspaceSize The required workspace size on the device.
137 */
138 cutensornetStatus_t setWorkspace(void* work, int64_t workspaceSize);
139
140 /**
141 * \brief In-place execution of the apply gate algorithm on \p siteA and \p siteB.
142 * \param[in] siteA The first site where the gate is applied to.
143 * \param[in] siteB The second site where the gate is applied to. Must be adjacent to \p siteA.
144 * \param[in,out] dataInA The data for the MPS tensor at \p siteA. The input will be overwritten with output mps tensor data.
145 * \param[in,out] dataInB The data for the MPS tensor at \p siteB. The input will be overwritten with output mps tensor data.
146 * \param[in] dataInG The input data for the gate tensor.
147 * \param[in] verbose Whether to print out the runtime information regarding truncation.
148 * \param[in] stream The CUDA stream on which the computation is performed.
149 */
150 cutensornetStatus_t applyGate(uint32_t siteA,
151 uint32_t siteB,
152 void* dataInA,
153 void* dataInB,
154 const void* dataInG,
155 bool verbose,
156 cudaStream_t stream);
157
158 /**
159 * \brief Free all the tensor descriptors in mpsHelper.
160 */
161 ~MPSHelper()
162 {
163 if (inited_)
164 {
165 for (auto& descTensor: descTensors_)
166 {
167 cutensornetDestroyTensorDescriptor(descTensor);
168 }
169 cutensornetDestroy(handle_);
170 cutensornetDestroyWorkspaceDescriptor(workDesc_);
171 }
172 if (svdConfig_ != nullptr)
173 {
174 cutensornetDestroyTensorSVDConfig(svdConfig_);
175 }
176 if (svdInfo_ != nullptr)
177 {
178 cutensornetDestroyTensorSVDInfo(svdInfo_);
179 }
180 }
181
182 private:
183 int32_t numSites_; ///< Number of sites in the MPS
184 int64_t physExtent_; ///< Extent for the physical index
185 int64_t maxVirtualExtent_{0}; ///< The maximal extent allowed for the virtual dimension
186 cudaDataType_t typeData_;
187 cutensornetComputeType_t typeCompute_;
188
189 bool inited_{false};
190 std::vector<int32_t> physModes_; ///< A vector of length \p numSites_ storing the physical mode of each site.
191 std::vector<int32_t> virtualModes_; ///< A vector of length \p numSites_+1; For site i, virtualModes_[i] and virtualModes_[i+1] represents the left and right virtual mode.
192 std::vector<int64_t> extentsPerSite_; ///< A vector of length \p numSites_+1; For site i, extentsPerSite_[i] and extentsPerSite_[i+1] represents the left and right virtual extent.
193
194 cutensornetHandle_t handle_{nullptr};
195 std::vector<cutensornetTensorDescriptor_t> descTensors_; /// A vector of length \p numSites_ storing the cutensornetTensorDescriptor_t for each site
196 cutensornetWorkspaceDescriptor_t workDesc_{nullptr};
197 cutensornetTensorSVDConfig_t svdConfig_{nullptr};
198 cutensornetTensorSVDInfo_t svdInfo_{nullptr};
199 cutensornetGateSplitAlgo_t gateAlgo_{CUTENSORNET_GATE_SPLIT_ALGO_DIRECT};
200 int32_t nextMode_{0}; /// The next mode label to use for labelling site tensors and gates.
201};
Note
For full definition of all the methods, please refer to the sample here.
Setup MPS simulation setting¶
Next, in the main function, we need to choose the simulation setting for the MPS simulation (i.e., the number of sites, the initial extents, and the data type).
558 /***********************************
559 * Step 1: basic MPS setup
560 ************************************/
561
562 // setup the simulation setting for the MPS
563 typedef std::complex<double> complexType;
564 cudaDataType_t typeData = CUDA_C_64F;
565 cutensornetComputeType_t typeCompute = CUTENSORNET_COMPUTE_64F;
566 int32_t numSites = 16;
567 int64_t physExtent = 2;
568 int64_t maxVirtualExtent = 12;
569 const std::vector<int64_t> initialVirtualExtents(numSites-1, 1); // starting MPS with shared extent of 1;
570
571 // initialize an MPSHelper to dynamically update tensor metadats
572 MPSHelper mpsHelper(numSites, physExtent, maxVirtualExtent, initialVirtualExtents, typeData, typeCompute);
573 HANDLE_ERROR( mpsHelper.initialize() );
The MPS metadata and all cuTensorNet library objects will be managed by the MPSHelper
while the data pointers are explicitly
managed in the main function.
Allocate memory and initialize data¶
Next, we allocate memory for the MPS operands and four 2-qubit gate tensors.
The largest tensor size for each MPS tensor can be queried through the MPSHelper
class.
The MPS tensors are initialized to a state corresponding to |00..000>
and the gate tensors are filled with random values.
576 /***********************************
577 * Step 2: data allocation
578 ************************************/
579
580 // query largest tensor sizes for the MPS
581 const std::vector<size_t> maxElementsPerSite = mpsHelper.getMaxTensorElements();
582 std::vector<void*> tensors_h;
583 std::vector<void*> tensors_d;
584 for (int32_t i=0; i<numSites; i++)
585 {
586 size_t maxSize = sizeof(complexType) * maxElementsPerSite.at(i);
587 void* data_h = malloc(maxSize);
588 memset(data_h, 0, maxSize);
589 // initialize state to |0000..0000>
590 *(complexType*)(data_h) = complexType(1,0);
591 void* data_d;
592 HANDLE_CUDA_ERROR( cudaMalloc(&data_d, maxSize) );
593 // data transfer from host to device
594 HANDLE_CUDA_ERROR( cudaMemcpy(data_d, data_h, maxSize, cudaMemcpyHostToDevice) );
595 tensors_h.push_back(data_h);
596 tensors_d.push_back(data_d);
597 }
598
599 // initialize 4 random gate tensors on host and copy them to device
600 const int32_t numRandomGates = 4;
601 const int64_t numGateElements = physExtent * physExtent * physExtent * physExtent; // shape (2, 2, 2, 2)
602 size_t gateSize = sizeof(complexType) * numGateElements;
603 complexType* gates_h[numRandomGates];
604 void* gates_d[numRandomGates];
605
606 for (int i=0; i<numRandomGates; i++)
607 {
608 gates_h[i] = (complexType*) malloc(gateSize);
609 HANDLE_CUDA_ERROR( cudaMalloc((void**) &gates_d[i], gateSize) );
610 for (int j=0; j<numGateElements; j++)
611 {
612 gates_h[i][j] = complexType(((float) rand())/RAND_MAX, ((float) rand())/RAND_MAX);
613 }
614 HANDLE_CUDA_ERROR( cudaMemcpy(gates_d[i], gates_h[i], gateSize, cudaMemcpyHostToDevice) );
615 }
616
Setup gate split options¶
Then we setup the SVD truncation parameters and the algorithm cutensornetGateSplitAlgo_t
to use for the gate split process.
618 /*****************************************
619 * Step 3: setup options for gate operation
620 ******************************************/
621
622 double absCutoff = 1e-2;
623 double relCutoff = 1e-2;
624 cutensornetTensorSVDNormalization_t renorm = CUTENSORNET_TENSOR_SVD_NORMALIZATION_L2; // renormalize the L2 norm of truncated singular values to 1.
625 cutensornetTensorSVDPartition_t partition = CUTENSORNET_TENSOR_SVD_PARTITION_UV_EQUAL; // equally partition the singular values onto U and V;
626 HANDLE_ERROR( mpsHelper.setSVDConfig(absCutoff, relCutoff, renorm, partition));
627
628 cutensornetGateSplitAlgo_t gateAlgo = CUTENSORNET_GATE_SPLIT_ALGO_REDUCED;
629 mpsHelper.setGateAlgorithm(gateAlgo);
Query and allocate required workspace¶
Once all simulation settings are set, we can query the required workspace size. Inside the MPSHelper, the required workspace size is estimated on the largest tensor sizes involved in the simulation.
632 /********************************************
633 * Step 4: workspace size query and allocation
634 *********************************************/
635
636 int64_t workspaceSize;
637 HANDLE_ERROR( mpsHelper.computeMaxWorkspaceSizes(&workspaceSize) );
638
639 void *work = nullptr;
640 std::cout << "Maximal workspace size required: " << workspaceSize << std::endl;
641 HANDLE_CUDA_ERROR( cudaMalloc(&work, workspaceSize) );
642
643 HANDLE_ERROR( mpsHelper.setWorkspace(work, workspaceSize));
644
Execution¶
At this stage, we can execute the simulation by iterating over all the gate tensors.
All the metadata of the MPS will be managed and updated inside the MPSHelper
.
646 /***********************************
647 * Step 5: execution
648 ************************************/
649
650 cudaStream_t stream;
651 HANDLE_CUDA_ERROR( cudaStreamCreate(&stream) );
652 uint32_t numLayers = 10; // 10 layers of gate
653 for (uint32_t i=0; i<numLayers; i++)
654 {
655 uint32_t start_site = i % 2;
656 std::cout << "Cycle " << i << ":" << std::endl;
657 bool verbose = (i == numLayers - 1);
658 for (uint32_t j=start_site; j<numSites-1; j=j+2)
659 {
660 uint32_t gateIdx = rand() % numRandomGates; // pick a random gate tensor
661 std::cout << "apply gate " << gateIdx << " on " << j << " and " << j+1<< std::endl;
662 void *dataA = tensors_d[j];
663 void *dataB = tensors_d[j+1];
664 void *dataG = gates_d[gateIdx];
665 HANDLE_ERROR( mpsHelper.applyGate(j, j+1, dataA, dataB, dataG, verbose, stream) );
666 }
667 }
668
669 HANDLE_CUDA_ERROR( cudaStreamSynchronize(stream) );
Free resources¶
After the simulation, we free up all the data pointers allocated in the main function.
672 /***********************************
673 * Step 6: free resources
674 ************************************/
675
676 std::cout << "Free all resources" << std::endl;
677
678 for (int i=0; i<numRandomGates; i++)
679 {
680 free(gates_h[i]);
681 HANDLE_CUDA_ERROR( cudaFree(gates_d[i]) );
682 }
683
684 for (int32_t i=0; i<numSites; i++)
685 {
686 free(tensors_h.at(i));
687 HANDLE_CUDA_ERROR( cudaFree(tensors_d.at(i)) );
688 }
689
690 HANDLE_CUDA_ERROR( cudaFree(work) );
691 // The MPSHelper destructor will free all internal resources when out of scope
692 return 0;
693}
All cuTensorNet library objects owned by the MPSHelper will be freed once out of scope.