The following code example illustrates how to integrate cuTensorNet functionalities to perform basic MPS simulation.
The workflow is encapsulated in an MPSHelper
class. The full code can be found in
the NVIDIA/cuQuantum repository (here).
Define MPSHelper class¶
We first define an MPSHelper
class to keep track of the modes and extents of all physical and virtual bonds.
The simulation settings are also stored in this class. Once out of scope, all resource owned by this class will be freed.
81class MPSHelper
82{
83 public:
84 /**
85 * \brief Construct an MPSHelper object for gate splitting algorithm.
86 * i j k
87 * -------A-------B------- i j k
88 * p| |q -------> -------A`-------B`-------
89 * GGGGGGGGG r| |s
90 * r| |s
91 * \param[in] numSites The number of sites in the MPS
92 * \param[in] physExtent The extent for the physical mode where the gate tensors are acted on.
93 * \param[in] maxVirtualExtent The maximal extent allowed for the virtual mode shared between adjacent MPS tensors.
94 * \param[in] initialVirtualExtents A vector of size \p numSites-1 where the ith element denotes the extent of the shared mode for site i and site i+1 in the beginning of the simulation.
95 * \param[in] typeData The data type for all tensors and gates
96 * \param[in] typeCompute The compute type for all gate splitting process
97 */
98 MPSHelper(int32_t numSites,
99 int64_t physExtent,
100 int64_t maxVirtualExtent,
101 const std::vector<int64_t>& initialVirtualExtents,
102 cudaDataType_t typeData,
103 cutensornetComputeType_t typeCompute);
104
105 /**
106 * \brief Initialize the MPS metadata and cutensornet library.
107 */
108 cutensornetStatus_t initialize();
109
110 /**
111 * \brief Compute the maximal number of elements for each site.
112 */
113 std::vector<size_t> getMaxTensorElements() const;
114
115 /**
116 * \brief Update the SVD truncation setting.
117 * \param[in] absCutoff The cutoff value for absolute singular value truncation.
118 * \param[in] relCutoff The cutoff value for relative singular value truncation.
119 * \param[in] renorm The option for renormalization of the truncated singular values.
120 * \param[in] partition The option for partitioning of the singular values.
121 */
122 cutensornetStatus_t setSVDConfig(double absCutoff,
123 double relCutoff,
124 cutensornetTensorSVDNormalization_t renorm,
125 cutensornetTensorSVDPartition_t partition);
126
127 /**
128 * \brief Update the algorithm to use for the gating process.
129 * \param[in] gateAlgo The gate algorithm to use for MPS simulation.
130 */
131 void setGateAlgorithm(cutensornetGateSplitAlgo_t gateAlgo) {gateAlgo_ = gateAlgo;}
132
133 /**
134 * \brief Compute the maximal workspace needed for MPS gating algorithm.
135 * \param[out] workspaceSize The required workspace size on the device.
136 */
137 cutensornetStatus_t computeMaxWorkspaceSizes(int64_t* workspaceSize);
138
139 /**
140 * \brief Compute the maximal workspace needed for MPS gating algorithm.
141 * \param[in] work Pointer to the allocated workspace.
142 * \param[in] workspaceSize The required workspace size on the device.
143 */
144 cutensornetStatus_t setWorkspace(void* work, int64_t workspaceSize);
145
146 /**
147 * \brief In-place execution of the apply gate algorithm on \p siteA and \p siteB.
148 * \param[in] siteA The first site where the gate is applied to.
149 * \param[in] siteB The second site where the gate is applied to. Must be adjacent to \p siteA.
150 * \param[in,out] dataInA The data for the MPS tensor at \p siteA. The input will be overwritten with output mps tensor data.
151 * \param[in,out] dataInB The data for the MPS tensor at \p siteB. The input will be overwritten with output mps tensor data.
152 * \param[in] dataInG The input data for the gate tensor.
153 * \param[in] verbose Whether to print out the runtime information regarding truncation.
154 * \param[in] stream The CUDA stream on which the computation is performed.
155 */
156 cutensornetStatus_t applyGate(uint32_t siteA,
157 uint32_t siteB,
158 void* dataInA,
159 void* dataInB,
160 const void* dataInG,
161 bool verbose,
162 cudaStream_t stream);
163
164 /**
165 * \brief Free all the tensor descriptors in mpsHelper.
166 */
167 ~MPSHelper()
168 {
169 if (inited_)
170 {
171 for (auto& descTensor: descTensors_)
172 {
173 cutensornetDestroyTensorDescriptor(descTensor);
174 }
175 cutensornetDestroy(handle_);
176 cutensornetDestroyWorkspaceDescriptor(workDesc_);
177 }
178 if (svdConfig_ != nullptr)
179 {
180 cutensornetDestroyTensorSVDConfig(svdConfig_);
181 }
182 if (svdInfo_ != nullptr)
183 {
184 cutensornetDestroyTensorSVDInfo(svdInfo_);
185 }
186 }
187
188 private:
189 int32_t numSites_; ///< Number of sites in the MPS
190 int64_t physExtent_; ///< Extent for the physical index
191 int64_t maxVirtualExtent_{0}; ///< The maximal extent allowed for the virtual dimension
192 cudaDataType_t typeData_;
193 cutensornetComputeType_t typeCompute_;
194
195 bool inited_{false};
196 std::vector<int32_t> physModes_; ///< A vector of length \p numSites_ storing the physical mode of each site.
197 std::vector<int32_t> virtualModes_; ///< A vector of length \p numSites_+1; For site i, virtualModes_[i] and virtualModes_[i+1] represents the left and right virtual mode.
198 std::vector<int64_t> extentsPerSite_; ///< A vector of length \p numSites_+1; For site i, extentsPerSite_[i] and extentsPerSite_[i+1] represents the left and right virtual extent.
199
200 cutensornetHandle_t handle_{nullptr};
201 std::vector<cutensornetTensorDescriptor_t> descTensors_; /// A vector of length \p numSites_ storing the cutensornetTensorDescriptor_t for each site
202 cutensornetWorkspaceDescriptor_t workDesc_{nullptr};
203 cutensornetTensorSVDConfig_t svdConfig_{nullptr};
204 cutensornetTensorSVDInfo_t svdInfo_{nullptr};
205 cutensornetGateSplitAlgo_t gateAlgo_{CUTENSORNET_GATE_SPLIT_ALGO_DIRECT};
206 int32_t nextMode_{0}; /// The next mode label to use for labelling site tensors and gates.
207};
Note
For full definition of all the methods, please refer to the sample here.
Setup MPS simulation setting¶
Next, in the main function, we need to choose the simulation setting for the MPS simulation (i.e., the number of sites, the initial extents, and the data type).
564 /***********************************
565 * Step 1: basic MPS setup
566 ************************************/
567
568 // setup the simulation setting for the MPS
569 typedef std::complex<double> complexType;
570 cudaDataType_t typeData = CUDA_C_64F;
571 cutensornetComputeType_t typeCompute = CUTENSORNET_COMPUTE_64F;
572 int32_t numSites = 16;
573 int64_t physExtent = 2;
574 int64_t maxVirtualExtent = 12;
575 const std::vector<int64_t> initialVirtualExtents(numSites-1, 1); // starting MPS with shared extent of 1;
576
577 // initialize an MPSHelper to dynamically update tensor metadats
578 MPSHelper mpsHelper(numSites, physExtent, maxVirtualExtent, initialVirtualExtents, typeData, typeCompute);
579 HANDLE_ERROR( mpsHelper.initialize() );
The MPS metadata and all cuTensorNet library objects will be managed by the MPSHelper
while the data pointers are explicitly
managed in the main function.
Allocate memory and initialize data¶
Next, we allocate memory for the MPS operands and four 2-qubit gate tensors.
The largest tensor size for each MPS tensor can be queried through the MPSHelper
class.
The MPS tensors are initialized to a state corresponding to |00..000>
and the gate tensors are filled with random values.
582 /***********************************
583 * Step 2: data allocation
584 ************************************/
585
586 // query largest tensor sizes for the MPS
587 const std::vector<size_t> maxElementsPerSite = mpsHelper.getMaxTensorElements();
588 std::vector<void*> tensors_h;
589 std::vector<void*> tensors_d;
590 for (int32_t i=0; i<numSites; i++)
591 {
592 size_t maxSize = sizeof(complexType) * maxElementsPerSite.at(i);
593 void* data_h = malloc(maxSize);
594 memset(data_h, 0, maxSize);
595 // initialize state to |0000..0000>
596 *(complexType*)(data_h) = complexType(1,0);
597 void* data_d;
598 HANDLE_CUDA_ERROR( cudaMalloc(&data_d, maxSize) );
599 // data transfer from host to device
600 HANDLE_CUDA_ERROR( cudaMemcpy(data_d, data_h, maxSize, cudaMemcpyHostToDevice) );
601 tensors_h.push_back(data_h);
602 tensors_d.push_back(data_d);
603 }
604
605 // initialize 4 random gate tensors on host and copy them to device
606 const int32_t numRandomGates = 4;
607 const int64_t numGateElements = physExtent * physExtent * physExtent * physExtent; // shape (2, 2, 2, 2)
608 size_t gateSize = sizeof(complexType) * numGateElements;
609 complexType* gates_h[numRandomGates];
610 void* gates_d[numRandomGates];
611
612 for (int i=0; i<numRandomGates; i++)
613 {
614 gates_h[i] = (complexType*) malloc(gateSize);
615 HANDLE_CUDA_ERROR( cudaMalloc((void**) &gates_d[i], gateSize) );
616 for (int j=0; j<numGateElements; j++)
617 {
618 gates_h[i][j] = complexType(((float) rand())/RAND_MAX, ((float) rand())/RAND_MAX);
619 }
620 HANDLE_CUDA_ERROR( cudaMemcpy(gates_d[i], gates_h[i], gateSize, cudaMemcpyHostToDevice) );
621 }
622
Setup gate split options¶
Then we setup the SVD truncation parameters and the algorithm cutensornetGateSplitAlgo_t
to use for the gate split process.
624 /*****************************************
625 * Step 3: setup options for gate operation
626 ******************************************/
627
628 double absCutoff = 1e-2;
629 double relCutoff = 1e-2;
630 cutensornetTensorSVDNormalization_t renorm = CUTENSORNET_TENSOR_SVD_NORMALIZATION_L2; // renormalize the L2 norm of truncated singular values to 1.
631 cutensornetTensorSVDPartition_t partition = CUTENSORNET_TENSOR_SVD_PARTITION_UV_EQUAL; // equally partition the singular values onto U and V;
632 HANDLE_ERROR( mpsHelper.setSVDConfig(absCutoff, relCutoff, renorm, partition));
633
634 cutensornetGateSplitAlgo_t gateAlgo = CUTENSORNET_GATE_SPLIT_ALGO_REDUCED;
635 mpsHelper.setGateAlgorithm(gateAlgo);
Query and allocate required workspace¶
Once all simulation settings are set, we can query the required workspace size. Inside the MPSHelper, the required workspace size is estimated on the largest tensor sizes involved in the simulation.
638 /********************************************
639 * Step 4: workspace size query and allocation
640 *********************************************/
641
642 int64_t workspaceSize;
643 HANDLE_ERROR( mpsHelper.computeMaxWorkspaceSizes(&workspaceSize) );
644
645 void *work = nullptr;
646 std::cout << "Maximal workspace size required: " << workspaceSize << std::endl;
647 HANDLE_CUDA_ERROR( cudaMalloc(&work, workspaceSize) );
648
649 HANDLE_ERROR( mpsHelper.setWorkspace(work, workspaceSize));
650
Execution¶
At this stage, we can execute the simulation by iterating over all the gate tensors.
All the metadata of the MPS will be managed and updated inside the MPSHelper
.
652 /***********************************
653 * Step 5: execution
654 ************************************/
655
656 cudaStream_t stream;
657 HANDLE_CUDA_ERROR( cudaStreamCreate(&stream) );
658 uint32_t numLayers = 10; // 10 layers of gate
659 for (uint32_t i=0; i<numLayers; i++)
660 {
661 uint32_t start_site = i % 2;
662 std::cout << "Cycle " << i << ":" << std::endl;
663 bool verbose = (i == numLayers - 1);
664 for (uint32_t j=start_site; j<numSites-1; j=j+2)
665 {
666 uint32_t gateIdx = rand() % numRandomGates; // pick a random gate tensor
667 std::cout << "apply gate " << gateIdx << " on " << j << " and " << j+1<< std::endl;
668 void *dataA = tensors_d[j];
669 void *dataB = tensors_d[j+1];
670 void *dataG = gates_d[gateIdx];
671 HANDLE_ERROR( mpsHelper.applyGate(j, j+1, dataA, dataB, dataG, verbose, stream) );
672 }
673 }
674
675 HANDLE_CUDA_ERROR( cudaStreamSynchronize(stream) );
Free resources¶
After the simulation, we free up all the data pointers allocated in the main function.
678 /***********************************
679 * Step 6: free resources
680 ************************************/
681
682 std::cout << "Free all resources" << std::endl;
683
684 for (int i=0; i<numRandomGates; i++)
685 {
686 free(gates_h[i]);
687 HANDLE_CUDA_ERROR( cudaFree(gates_d[i]) );
688 }
689
690 for (int32_t i=0; i<numSites; i++)
691 {
692 free(tensors_h.at(i));
693 HANDLE_CUDA_ERROR( cudaFree(tensors_d.at(i)) );
694 }
695
696 HANDLE_CUDA_ERROR( cudaFree(work) );
697 // The MPSHelper destructor will free all internal resources when out of scope
698 return 0;
699}
All cuTensorNet library objects owned by the MPSHelper will be freed once out of scope.