The following code example illustrates how to integrate cuTensorNet functionalities to perform basic MPS simulation. The workflow is encapsulated in an MPSHelper class. The full code can be found in the NVIDIA/cuQuantum repository (here).

Define MPSHelper class

We first define an MPSHelper class to keep track of the modes and extents of all physical and virtual bonds. The simulation settings are also stored in this class. Once out of scope, all resource owned by this class will be freed.

 75class MPSHelper
 76{
 77   public:
 78      /**
 79       * \brief Construct an MPSHelper object for gate splitting algorithm.
 80       *        i       j       k
 81       *     -------A-------B-------                      i        j        k
 82       *           p|       |q            ------->     -------A`-------B`-------
 83       *            GGGGGGGGG                                r|        |s
 84       *           r|       |s
 85       * \param[in] numSites The number of sites in the MPS
 86       * \param[in] physExtent The extent for the physical mode where the gate tensors are acted on. 
 87       * \param[in] maxVirtualExtent The maximal extent allowed for the virtual mode shared between adjacent MPS tensors. 
 88       * \param[in] initialVirtualExtents A vector of size \p numSites-1 where the ith element denotes the extent of the shared mode for site i and site i+1 in the beginning of the simulation.
 89       * \param[in] typeData The data type for all tensors and gates
 90       * \param[in] typeCompute The compute type for all gate splitting process
 91       */
 92      MPSHelper(int32_t numSites, 
 93                int64_t physExtent,
 94                int64_t maxVirtualExtent,
 95                const std::vector<int64_t>& initialVirtualExtents,
 96                cudaDataType_t typeData, 
 97                cutensornetComputeType_t typeCompute);
 98      
 99      /**
100       * \brief Initialize the MPS metadata and cutensornet library.
101       */
102      cutensornetStatus_t initialize();
103
104      /**
105       * \brief Compute the maximal number of elements for each site.
106       */
107      std::vector<size_t> getMaxTensorElements() const;
108
109      /**
110       * \brief Update the SVD truncation setting.
111       * \param[in] absCutoff The cutoff value for absolute singular value truncation.
112       * \param[in] relCutoff The cutoff value for relative singular value truncation.
113       * \param[in] renorm The option for renormalization of the truncated singular values.
114       * \param[in] partition The option for partitioning of the singular values. 
115       */
116      cutensornetStatus_t setSVDConfig(double absCutoff, 
117                                       double relCutoff, 
118                                       cutensornetTensorSVDNormalization_t renorm,
119                                       cutensornetTensorSVDPartition_t partition);
120
121      /**
122       * \brief Update the algorithm to use for the gating process.
123       * \param[in] gateAlgo The gate algorithm to use for MPS simulation.
124       */
125      void setGateAlgorithm(cutensornetGateSplitAlgo_t gateAlgo) {gateAlgo_ = gateAlgo;}
126
127      /**
128       * \brief Compute the maximal workspace needed for MPS gating algorithm.
129       * \param[out] workspaceSize The required workspace size on the device. 
130       */
131      cutensornetStatus_t computeMaxWorkspaceSizes(int64_t* workspaceSize);
132
133      /**
134       * \brief Compute the maximal workspace needed for MPS gating algorithm.
135       * \param[in] work Pointer to the allocated workspace.
136       * \param[in] workspaceSize The required workspace size on the device. 
137       */
138      cutensornetStatus_t setWorkspace(void* work, int64_t workspaceSize);
139
140      /**
141       * \brief In-place execution of the apply gate algorithm on \p siteA and \p siteB.
142       * \param[in] siteA The first site where the gate is applied to.
143       * \param[in] siteB The second site where the gate is applied to. Must be adjacent to \p siteA.
144       * \param[in,out] dataInA The data for the MPS tensor at \p siteA. The input will be overwritten with output mps tensor data.
145       * \param[in,out] dataInB The data for the MPS tensor at \p siteB. The input will be overwritten with output mps tensor data.
146       * \param[in] dataInG The input data for the gate tensor. 
147       * \param[in] verbose Whether to print out the runtime information regarding truncation. 
148       * \param[in] stream The CUDA stream on which the computation is performed.
149       */
150      cutensornetStatus_t applyGate(uint32_t siteA, 
151                                    uint32_t siteB, 
152                                    void* dataInA, 
153                                    void* dataInB, 
154                                    const void* dataInG, 
155                                    bool verbose,
156                                    cudaStream_t stream);
157      
158      /**
159       * \brief Free all the tensor descriptors in mpsHelper.
160       */
161      ~MPSHelper()
162      {
163         if (inited_)
164         {
165            for (auto& descTensor: descTensors_)
166            {
167               cutensornetDestroyTensorDescriptor(descTensor);
168            }
169            cutensornetDestroy(handle_);
170            cutensornetDestroyWorkspaceDescriptor(workDesc_);
171         }
172         if (svdConfig_ != nullptr)
173         {
174            cutensornetDestroyTensorSVDConfig(svdConfig_);
175         }
176         if (svdInfo_ != nullptr)
177         {
178            cutensornetDestroyTensorSVDInfo(svdInfo_);
179         }
180      }
181
182   private:
183      int32_t numSites_; ///< Number of sites in the MPS
184      int64_t physExtent_; ///< Extent for the physical index 
185      int64_t maxVirtualExtent_{0}; ///< The maximal extent allowed for the virtual dimension
186      cudaDataType_t typeData_; 
187      cutensornetComputeType_t typeCompute_;
188      
189      bool inited_{false};
190      std::vector<int32_t> physModes_; ///< A vector of length \p numSites_ storing the physical mode of each site.
191      std::vector<int32_t> virtualModes_; ///< A vector of length \p numSites_+1; For site i, virtualModes_[i] and virtualModes_[i+1] represents the left and right virtual mode.
192      std::vector<int64_t> extentsPerSite_; ///< A vector of length \p numSites_+1; For site i, extentsPerSite_[i] and extentsPerSite_[i+1] represents the left and right virtual extent. 
193
194      cutensornetHandle_t handle_{nullptr};
195      std::vector<cutensornetTensorDescriptor_t> descTensors_; /// A vector of length \p numSites_ storing the cutensornetTensorDescriptor_t for each site
196      cutensornetWorkspaceDescriptor_t workDesc_{nullptr};
197      cutensornetTensorSVDConfig_t svdConfig_{nullptr};
198      cutensornetTensorSVDInfo_t svdInfo_{nullptr};
199      cutensornetGateSplitAlgo_t gateAlgo_{CUTENSORNET_GATE_SPLIT_ALGO_DIRECT};
200      int32_t nextMode_{0}; /// The next mode label to use for labelling site tensors and gates.
201};

Note

For full definition of all the methods, please refer to the sample here.

Setup MPS simulation setting

Next, in the main function, we need to choose the simulation setting for the MPS simulation (i.e., the number of sites, the initial extents, and the data type).

557   /***********************************
558   * Step 1: basic MPS setup
559   ************************************/
560
561   // setup the simulation setting for the MPS
562   typedef std::complex<double> complexType;
563   cudaDataType_t typeData = CUDA_C_64F;
564   cutensornetComputeType_t typeCompute = CUTENSORNET_COMPUTE_64F;
565   int32_t numSites = 16;
566   int64_t physExtent = 2;
567   int64_t maxVirtualExtent = 12;
568   const std::vector<int64_t> initialVirtualExtents(numSites-1, 1);  // starting MPS with shared extent of 1;
569
570   // initialize an MPSHelper to dynamically update tensor metadats   
571   MPSHelper mpsHelper(numSites, physExtent, maxVirtualExtent, initialVirtualExtents, typeData, typeCompute);
572   HANDLE_ERROR( mpsHelper.initialize() );

The MPS metadata and all cuTensorNet library objects will be managed by the MPSHelper while the data pointers are explicitly managed in the main function.

Allocate memory and initialize data

Next, we allocate memory for the MPS operands and four 2-qubit gate tensors. The largest tensor size for each MPS tensor can be queried through the MPSHelper class. The MPS tensors are initialized to a state corresponding to |00..000> and the gate tensors are filled with random values.

575   /***********************************
576   * Step 2: data allocation 
577   ************************************/
578
579   // query largest tensor sizes for the MPS
580   const std::vector<size_t> maxElementsPerSite = mpsHelper.getMaxTensorElements();
581   std::vector<void*> tensors_h;
582   std::vector<void*> tensors_d;
583   for (int32_t i=0; i<numSites; i++)
584   {
585      size_t maxSize = sizeof(complexType) * maxElementsPerSite.at(i);
586      void* data_h = malloc(maxSize);
587      memset(data_h, 0, maxSize);
588      // initialize state to |0000..0000>
589      *(complexType*)(data_h) = complexType(1,0);  
590      void* data_d;
591      HANDLE_CUDA_ERROR( cudaMalloc(&data_d, maxSize) );
592      // data transfer from host to device
593      HANDLE_CUDA_ERROR( cudaMemcpy(data_d, data_h, maxSize, cudaMemcpyHostToDevice) );
594      tensors_h.push_back(data_h);
595      tensors_d.push_back(data_d);
596   }
597
598   // initialize 4 random gate tensors on host and copy them to device
599   const int32_t numRandomGates = 4;
600   const int64_t numGateElements = physExtent * physExtent * physExtent * physExtent;  // shape (2, 2, 2, 2)
601   size_t gateSize = sizeof(complexType) * numGateElements;
602   complexType* gates_h[numRandomGates];
603   void* gates_d[numRandomGates];
604   
605   for (int i=0; i<numRandomGates; i++)
606   {
607      gates_h[i] = (complexType*) malloc(gateSize);
608      HANDLE_CUDA_ERROR( cudaMalloc((void**) &gates_d[i], gateSize) );
609      for (int j=0; j<numGateElements; j++)
610      {
611         gates_h[i][j] = complexType(((float) rand())/RAND_MAX, ((float) rand())/RAND_MAX);
612      }
613      HANDLE_CUDA_ERROR( cudaMemcpy(gates_d[i], gates_h[i], gateSize, cudaMemcpyHostToDevice) );
614   }
615   

Setup gate split options

Then we setup the SVD truncation parameters and the algorithm cutensornetGateSplitAlgo_t to use for the gate split process.

617   /*****************************************
618   * Step 3: setup options for gate operation
619   ******************************************/
620
621   double absCutoff = 1e-2;
622   double relCutoff = 1e-2;
623   cutensornetTensorSVDNormalization_t renorm = CUTENSORNET_TENSOR_SVD_NORMALIZATION_L2; // renormalize the L2 norm of truncated singular values to 1. 
624   cutensornetTensorSVDPartition_t partition = CUTENSORNET_TENSOR_SVD_PARTITION_UV_EQUAL; // equally partition the singular values onto U and V;
625   HANDLE_ERROR( mpsHelper.setSVDConfig(absCutoff, relCutoff, renorm, partition));
626
627   cutensornetGateSplitAlgo_t gateAlgo = CUTENSORNET_GATE_SPLIT_ALGO_REDUCED;
628   mpsHelper.setGateAlgorithm(gateAlgo);

Query and allocate required workspace

Once all simulation settings are set, we can query the required workspace size. Inside the MPSHelper, the required workspace size is estimated on the largest tensor sizes involved in the simulation.

631   /********************************************
632   * Step 4: workspace size query and allocation
633   *********************************************/
634
635   int64_t workspaceSize;
636   HANDLE_ERROR( mpsHelper.computeMaxWorkspaceSizes(&workspaceSize) );
637
638   void *work = nullptr;
639   std::cout << "Maximal workspace size required: " << workspaceSize << std::endl;
640   HANDLE_CUDA_ERROR( cudaMalloc(&work, workspaceSize) );
641
642   HANDLE_ERROR( mpsHelper.setWorkspace(work, workspaceSize));
643   

Execution

At this stage, we can execute the simulation by iterating over all the gate tensors. All the metadata of the MPS will be managed and updated inside the MPSHelper.

645   /***********************************
646   * Step 5: execution
647   ************************************/
648
649   cudaStream_t stream;
650   HANDLE_CUDA_ERROR( cudaStreamCreate(&stream) );
651   uint32_t numLayers = 10; // 10 layers of gate
652   for (uint32_t i=0; i<numLayers; i++)
653   {
654      uint32_t start_site = i % 2;
655      std::cout << "Cycle " << i << ":" << std::endl;
656      bool verbose = (i == numLayers - 1);
657      for (uint32_t j=start_site; j<numSites-1; j=j+2)
658      {
659         uint32_t gateIdx = rand() % numRandomGates; // pick a random gate tensor
660         std::cout << "apply gate " << gateIdx << " on " << j << " and " << j+1<< std::endl;
661         void *dataA = tensors_d[j];
662         void *dataB = tensors_d[j+1];
663         void *dataG = gates_d[gateIdx];
664         HANDLE_ERROR( mpsHelper.applyGate(j, j+1, dataA, dataB, dataG, verbose, stream) );
665      }
666   }
667
668   HANDLE_CUDA_ERROR( cudaStreamSynchronize(stream) );

Free resources

After the simulation, we free up all the data pointers allocated in the main function.

671   /***********************************
672   * Step 6: free resources
673   ************************************/
674   
675   std::cout << "Free all resources" << std::endl;
676
677   for (int i=0; i<numRandomGates; i++)
678   {
679      free(gates_h[i]);
680      HANDLE_CUDA_ERROR( cudaFree(gates_d[i]) );
681   }
682
683   for (int32_t i=0; i<numSites; i++)
684   {
685      free(tensors_h.at(i));
686      HANDLE_CUDA_ERROR( cudaFree(tensors_d.at(i)) );
687   }
688
689   HANDLE_CUDA_ERROR( cudaFree(work) );
690   // The MPSHelper destructor will free all internal resources when out of scope
691   return 0;   
692}

All cuTensorNet library objects owned by the MPSHelper will be freed once out of scope.