The following code example illustrates how to integrate cuTensorNet functionalities to perform basic MPS simulation. The workflow is encapsulated in an MPSHelper class. The full code can be found in the NVIDIA/cuQuantum repository (here).

Define MPSHelper class

We first define an MPSHelper class to keep track of the modes and extents of all physical and virtual bonds. The simulation settings are also stored in this class. Once out of scope, all resource owned by this class will be freed.

 81class MPSHelper
 82{
 83   public:
 84      /**
 85       * \brief Construct an MPSHelper object for gate splitting algorithm.
 86       *        i       j       k
 87       *     -------A-------B-------                      i        j        k
 88       *           p|       |q            ------->     -------A`-------B`-------
 89       *            GGGGGGGGG                                r|        |s
 90       *           r|       |s
 91       * \param[in] numSites The number of sites in the MPS
 92       * \param[in] physExtent The extent for the physical mode where the gate tensors are acted on. 
 93       * \param[in] maxVirtualExtent The maximal extent allowed for the virtual mode shared between adjacent MPS tensors. 
 94       * \param[in] initialVirtualExtents A vector of size \p numSites-1 where the ith element denotes the extent of the shared mode for site i and site i+1 in the beginning of the simulation.
 95       * \param[in] typeData The data type for all tensors and gates
 96       * \param[in] typeCompute The compute type for all gate splitting process
 97       */
 98      MPSHelper(int32_t numSites, 
 99                int64_t physExtent,
100                int64_t maxVirtualExtent,
101                const std::vector<int64_t>& initialVirtualExtents,
102                cudaDataType_t typeData, 
103                cutensornetComputeType_t typeCompute);
104      
105      /**
106       * \brief Initialize the MPS metadata and cutensornet library.
107       */
108      cutensornetStatus_t initialize();
109
110      /**
111       * \brief Compute the maximal number of elements for each site.
112       */
113      std::vector<size_t> getMaxTensorElements() const;
114
115      /**
116       * \brief Update the SVD truncation setting.
117       * \param[in] absCutoff The cutoff value for absolute singular value truncation.
118       * \param[in] relCutoff The cutoff value for relative singular value truncation.
119       * \param[in] renorm The option for renormalization of the truncated singular values.
120       * \param[in] partition The option for partitioning of the singular values. 
121       */
122      cutensornetStatus_t setSVDConfig(double absCutoff, 
123                                       double relCutoff, 
124                                       cutensornetTensorSVDNormalization_t renorm,
125                                       cutensornetTensorSVDPartition_t partition);
126
127      /**
128       * \brief Update the algorithm to use for the gating process.
129       * \param[in] gateAlgo The gate algorithm to use for MPS simulation.
130       */
131      void setGateAlgorithm(cutensornetGateSplitAlgo_t gateAlgo) {gateAlgo_ = gateAlgo;}
132
133      /**
134       * \brief Compute the maximal workspace needed for MPS gating algorithm.
135       * \param[out] workspaceSize The required workspace size on the device. 
136       */
137      cutensornetStatus_t computeMaxWorkspaceSizes(int64_t* workspaceSize);
138
139      /**
140       * \brief Compute the maximal workspace needed for MPS gating algorithm.
141       * \param[in] work Pointer to the allocated workspace.
142       * \param[in] workspaceSize The required workspace size on the device. 
143       */
144      cutensornetStatus_t setWorkspace(void* work, int64_t workspaceSize);
145
146      /**
147       * \brief In-place execution of the apply gate algorithm on \p siteA and \p siteB.
148       * \param[in] siteA The first site where the gate is applied to.
149       * \param[in] siteB The second site where the gate is applied to. Must be adjacent to \p siteA.
150       * \param[in,out] dataInA The data for the MPS tensor at \p siteA. The input will be overwritten with output mps tensor data.
151       * \param[in,out] dataInB The data for the MPS tensor at \p siteB. The input will be overwritten with output mps tensor data.
152       * \param[in] dataInG The input data for the gate tensor. 
153       * \param[in] verbose Whether to print out the runtime information regarding truncation. 
154       * \param[in] stream The CUDA stream on which the computation is performed.
155       */
156      cutensornetStatus_t applyGate(uint32_t siteA, 
157                                    uint32_t siteB, 
158                                    void* dataInA, 
159                                    void* dataInB, 
160                                    const void* dataInG, 
161                                    bool verbose,
162                                    cudaStream_t stream);
163      
164      /**
165       * \brief Free all the tensor descriptors in mpsHelper.
166       */
167      ~MPSHelper()
168      {
169         if (inited_)
170         {
171            for (auto& descTensor: descTensors_)
172            {
173               cutensornetDestroyTensorDescriptor(descTensor);
174            }
175            cutensornetDestroy(handle_);
176            cutensornetDestroyWorkspaceDescriptor(workDesc_);
177         }
178         if (svdConfig_ != nullptr)
179         {
180            cutensornetDestroyTensorSVDConfig(svdConfig_);
181         }
182         if (svdInfo_ != nullptr)
183         {
184            cutensornetDestroyTensorSVDInfo(svdInfo_);
185         }
186      }
187
188   private:
189      int32_t numSites_; ///< Number of sites in the MPS
190      int64_t physExtent_; ///< Extent for the physical index 
191      int64_t maxVirtualExtent_{0}; ///< The maximal extent allowed for the virtual dimension
192      cudaDataType_t typeData_; 
193      cutensornetComputeType_t typeCompute_;
194      
195      bool inited_{false};
196      std::vector<int32_t> physModes_; ///< A vector of length \p numSites_ storing the physical mode of each site.
197      std::vector<int32_t> virtualModes_; ///< A vector of length \p numSites_+1; For site i, virtualModes_[i] and virtualModes_[i+1] represents the left and right virtual mode.
198      std::vector<int64_t> extentsPerSite_; ///< A vector of length \p numSites_+1; For site i, extentsPerSite_[i] and extentsPerSite_[i+1] represents the left and right virtual extent. 
199
200      cutensornetHandle_t handle_{nullptr};
201      std::vector<cutensornetTensorDescriptor_t> descTensors_; /// A vector of length \p numSites_ storing the cutensornetTensorDescriptor_t for each site
202      cutensornetWorkspaceDescriptor_t workDesc_{nullptr};
203      cutensornetTensorSVDConfig_t svdConfig_{nullptr};
204      cutensornetTensorSVDInfo_t svdInfo_{nullptr};
205      cutensornetGateSplitAlgo_t gateAlgo_{CUTENSORNET_GATE_SPLIT_ALGO_DIRECT};
206      int32_t nextMode_{0}; /// The next mode label to use for labelling site tensors and gates.
207};

Note

For full definition of all the methods, please refer to the sample here.

Setup MPS simulation setting

Next, in the main function, we need to choose the simulation setting for the MPS simulation (i.e., the number of sites, the initial extents, and the data type).

564   /***********************************
565   * Step 1: basic MPS setup
566   ************************************/
567
568   // setup the simulation setting for the MPS
569   typedef std::complex<double> complexType;
570   cudaDataType_t typeData = CUDA_C_64F;
571   cutensornetComputeType_t typeCompute = CUTENSORNET_COMPUTE_64F;
572   int32_t numSites = 16;
573   int64_t physExtent = 2;
574   int64_t maxVirtualExtent = 12;
575   const std::vector<int64_t> initialVirtualExtents(numSites-1, 1);  // starting MPS with shared extent of 1;
576
577   // initialize an MPSHelper to dynamically update tensor metadats   
578   MPSHelper mpsHelper(numSites, physExtent, maxVirtualExtent, initialVirtualExtents, typeData, typeCompute);
579   HANDLE_ERROR( mpsHelper.initialize() );

The MPS metadata and all cuTensorNet library objects will be managed by the MPSHelper while the data pointers are explicitly managed in the main function.

Allocate memory and initialize data

Next, we allocate memory for the MPS operands and four 2-qubit gate tensors. The largest tensor size for each MPS tensor can be queried through the MPSHelper class. The MPS tensors are initialized to a state corresponding to |00..000> and the gate tensors are filled with random values.

582   /***********************************
583   * Step 2: data allocation 
584   ************************************/
585
586   // query largest tensor sizes for the MPS
587   const std::vector<size_t> maxElementsPerSite = mpsHelper.getMaxTensorElements();
588   std::vector<void*> tensors_h;
589   std::vector<void*> tensors_d;
590   for (int32_t i=0; i<numSites; i++)
591   {
592      size_t maxSize = sizeof(complexType) * maxElementsPerSite.at(i);
593      void* data_h = malloc(maxSize);
594      memset(data_h, 0, maxSize);
595      // initialize state to |0000..0000>
596      *(complexType*)(data_h) = complexType(1,0);  
597      void* data_d;
598      HANDLE_CUDA_ERROR( cudaMalloc(&data_d, maxSize) );
599      // data transfer from host to device
600      HANDLE_CUDA_ERROR( cudaMemcpy(data_d, data_h, maxSize, cudaMemcpyHostToDevice) );
601      tensors_h.push_back(data_h);
602      tensors_d.push_back(data_d);
603   }
604
605   // initialize 4 random gate tensors on host and copy them to device
606   const int32_t numRandomGates = 4;
607   const int64_t numGateElements = physExtent * physExtent * physExtent * physExtent;  // shape (2, 2, 2, 2)
608   size_t gateSize = sizeof(complexType) * numGateElements;
609   complexType* gates_h[numRandomGates];
610   void* gates_d[numRandomGates];
611   
612   for (int i=0; i<numRandomGates; i++)
613   {
614      gates_h[i] = (complexType*) malloc(gateSize);
615      HANDLE_CUDA_ERROR( cudaMalloc((void**) &gates_d[i], gateSize) );
616      for (int j=0; j<numGateElements; j++)
617      {
618         gates_h[i][j] = complexType(((float) rand())/RAND_MAX, ((float) rand())/RAND_MAX);
619      }
620      HANDLE_CUDA_ERROR( cudaMemcpy(gates_d[i], gates_h[i], gateSize, cudaMemcpyHostToDevice) );
621   }
622   

Setup gate split options

Then we setup the SVD truncation parameters and the algorithm cutensornetGateSplitAlgo_t to use for the gate split process.

624   /*****************************************
625   * Step 3: setup options for gate operation
626   ******************************************/
627
628   double absCutoff = 1e-2;
629   double relCutoff = 1e-2;
630   cutensornetTensorSVDNormalization_t renorm = CUTENSORNET_TENSOR_SVD_NORMALIZATION_L2; // renormalize the L2 norm of truncated singular values to 1. 
631   cutensornetTensorSVDPartition_t partition = CUTENSORNET_TENSOR_SVD_PARTITION_UV_EQUAL; // equally partition the singular values onto U and V;
632   HANDLE_ERROR( mpsHelper.setSVDConfig(absCutoff, relCutoff, renorm, partition));
633
634   cutensornetGateSplitAlgo_t gateAlgo = CUTENSORNET_GATE_SPLIT_ALGO_REDUCED;
635   mpsHelper.setGateAlgorithm(gateAlgo);

Query and allocate required workspace

Once all simulation settings are set, we can query the required workspace size. Inside the MPSHelper, the required workspace size is estimated on the largest tensor sizes involved in the simulation.

638   /********************************************
639   * Step 4: workspace size query and allocation
640   *********************************************/
641
642   int64_t workspaceSize;
643   HANDLE_ERROR( mpsHelper.computeMaxWorkspaceSizes(&workspaceSize) );
644
645   void *work = nullptr;
646   std::cout << "Maximal workspace size required: " << workspaceSize << std::endl;
647   HANDLE_CUDA_ERROR( cudaMalloc(&work, workspaceSize) );
648
649   HANDLE_ERROR( mpsHelper.setWorkspace(work, workspaceSize));
650   

Execution

At this stage, we can execute the simulation by iterating over all the gate tensors. All the metadata of the MPS will be managed and updated inside the MPSHelper.

652   /***********************************
653   * Step 5: execution
654   ************************************/
655
656   cudaStream_t stream;
657   HANDLE_CUDA_ERROR( cudaStreamCreate(&stream) );
658   uint32_t numLayers = 10; // 10 layers of gate
659   for (uint32_t i=0; i<numLayers; i++)
660   {
661      uint32_t start_site = i % 2;
662      std::cout << "Cycle " << i << ":" << std::endl;
663      bool verbose = (i == numLayers - 1);
664      for (uint32_t j=start_site; j<numSites-1; j=j+2)
665      {
666         uint32_t gateIdx = rand() % numRandomGates; // pick a random gate tensor
667         std::cout << "apply gate " << gateIdx << " on " << j << " and " << j+1<< std::endl;
668         void *dataA = tensors_d[j];
669         void *dataB = tensors_d[j+1];
670         void *dataG = gates_d[gateIdx];
671         HANDLE_ERROR( mpsHelper.applyGate(j, j+1, dataA, dataB, dataG, verbose, stream) );
672      }
673   }
674
675   HANDLE_CUDA_ERROR( cudaStreamSynchronize(stream) );

Free resources

After the simulation, we free up all the data pointers allocated in the main function.

678   /***********************************
679   * Step 6: free resources
680   ************************************/
681   
682   std::cout << "Free all resources" << std::endl;
683
684   for (int i=0; i<numRandomGates; i++)
685   {
686      free(gates_h[i]);
687      HANDLE_CUDA_ERROR( cudaFree(gates_d[i]) );
688   }
689
690   for (int32_t i=0; i<numSites; i++)
691   {
692      free(tensors_h.at(i));
693      HANDLE_CUDA_ERROR( cudaFree(tensors_d.at(i)) );
694   }
695
696   HANDLE_CUDA_ERROR( cudaFree(work) );
697   // The MPSHelper destructor will free all internal resources when out of scope
698   return 0;   
699}

All cuTensorNet library objects owned by the MPSHelper will be freed once out of scope.