cuTensorNet Functions ********************* .. role:: raw-html(raw) :format: html Handle Management API ===================== .. _cutensornetCreate-label: :code:`cutensornetCreate` ------------------------- .. doxygenfunction:: cutensornetCreate ---- .. _cutensornetDestroy-label: :code:`cutensornetDestroy` -------------------------- .. doxygenfunction:: cutensornetDestroy ---- Network Descriptor API ====================== .. _cutensornetCreateNetworkDescriptor-label: :code:`cutensornetCreateNetworkDescriptor` ------------------------------------------ .. doxygenfunction:: cutensornetCreateNetworkDescriptor ---- .. _cutensornetDestroyNetworkDescriptor-label: :code:`cutensornetDestroyNetworkDescriptor` ------------------------------------------- .. doxygenfunction:: cutensornetDestroyNetworkDescriptor ---- .. _cutensornetGetOutputTensorDetails-label: :code:`cutensornetGetOutputTensorDetails` ------------------------------------------- .. doxygenfunction:: cutensornetGetOutputTensorDetails ---- Contraction Optimizer API ========================= .. _cutensornetCreateContractionOptimizerConfig-label: :code:`cutensornetCreateContractionOptimizerConfig` --------------------------------------------------- .. doxygenfunction:: cutensornetCreateContractionOptimizerConfig ---- .. _cutensornetDestroyContractionOptimizerConfig-label: :code:`cutensornetDestroyContractionOptimizerConfig` ---------------------------------------------------- .. doxygenfunction:: cutensornetDestroyContractionOptimizerConfig ---- .. _cutensornetContractionOptimizerConfigGetAttribute-label: :code:`cutensornetContractionOptimizerConfigGetAttribute` --------------------------------------------------------- .. doxygenfunction:: cutensornetContractionOptimizerConfigGetAttribute ---- .. _cutensornetContractionOptimizerConfigSetAttribute-label: :code:`cutensornetContractionOptimizerConfigSetAttribute` --------------------------------------------------------- .. doxygenfunction:: cutensornetContractionOptimizerConfigSetAttribute ---- .. _cutensornetCreateContractionOptimizerInfo-label: :code:`cutensornetCreateContractionOptimizerInfo` ------------------------------------------------- .. doxygenfunction:: cutensornetCreateContractionOptimizerInfo ---- .. _cutensornetDestroyContractionOptimizerInfo-label: :code:`cutensornetDestroyContractionOptimizerInfo` -------------------------------------------------- .. doxygenfunction:: cutensornetDestroyContractionOptimizerInfo ---- .. _cutensornetContractionOptimize-label: :code:`cutensornetContractionOptimize` -------------------------------------- .. doxygenfunction:: cutensornetContractionOptimize ---- .. _cutensornetContractionOptimizerInfoGetAttribute-label: :code:`cutensornetContractionOptimizerInfoGetAttribute` ------------------------------------------------------- .. doxygenfunction:: cutensornetContractionOptimizerInfoGetAttribute ---- .. _cutensornetContractionOptimizerInfoSetAttribute-label: :code:`cutensornetContractionOptimizerInfoSetAttribute` ------------------------------------------------------- .. doxygenfunction:: cutensornetContractionOptimizerInfoSetAttribute ---- Contraction Plan API ==================== .. _cutensornetCreateContractionPlan-label: :code:`cutensornetCreateContractionPlan` ---------------------------------------- .. doxygenfunction:: cutensornetCreateContractionPlan ---- .. _cutensornetDestroyContractionPlan-label: :code:`cutensornetDestroyContractionPlan` ----------------------------------------- .. doxygenfunction:: cutensornetDestroyContractionPlan ---- .. _cutensornetContractionAutotune-label: :code:`cutensornetContractionAutotune` -------------------------------------- .. doxygenfunction:: cutensornetContractionAutotune ---- .. _cutensornetCreateContractionAutotunePreference-label: :code:`cutensornetCreateContractionAutotunePreference` ------------------------------------------------------ .. doxygenfunction:: cutensornetCreateContractionAutotunePreference ---- .. _cutensornetContractionAutotunePreferenceGetAttribute-label: :code:`cutensornetContractionAutotunePreferenceGetAttribute` ------------------------------------------------------------ .. doxygenfunction:: cutensornetContractionAutotunePreferenceGetAttribute ---- .. _cutensornetContractionAutotunePreferenceSetAttribute-label: :code:`cutensornetContractionAutotunePreferenceSetAttribute` ------------------------------------------------------------ .. doxygenfunction:: cutensornetContractionAutotunePreferenceSetAttribute ---- .. _cutensornetDestroyContractionAutotunePreference-label: :code:`cutensornetDestroyContractionAutotunePreference` ------------------------------------------------------- .. doxygenfunction:: cutensornetDestroyContractionAutotunePreference ---- .. _cuTensorNet workspace management API: Workspace Management API ======================== .. _cutensornetCreateWorkspaceDescriptor-label: :code:`cutensornetCreateWorkspaceDescriptor` -------------------------------------------- .. doxygenfunction:: cutensornetCreateWorkspaceDescriptor ---- .. _cutensornetWorkspaceComputeSizes-label: :code:`cutensornetWorkspaceComputeSizes` ---------------------------------------- .. doxygenfunction:: cutensornetWorkspaceComputeSizes ---- .. _cutensornetWorkspaceGetSize-label: :code:`cutensornetWorkspaceGetSize` ----------------------------------- .. doxygenfunction:: cutensornetWorkspaceGetSize ---- .. _cutensornetWorkspaceSet-label: :code:`cutensornetWorkspaceSet` ------------------------------- .. doxygenfunction:: cutensornetWorkspaceSet ---- .. _cutensornetWorkspaceGet-label: :code:`cutensornetWorkspaceGet` ------------------------------- .. doxygenfunction:: cutensornetWorkspaceGet ---- .. _cutensornetDestroyWorkspaceDescriptor-label: :code:`cutensornetDestroyWorkspaceDescriptor` --------------------------------------------- .. doxygenfunction:: cutensornetDestroyWorkspaceDescriptor ---- Network Contraction API ======================= .. _cutensornetContraction-label: :code:`cutensornetContraction` ------------------------------ .. doxygenfunction:: cutensornetContraction .. _cuTensorNet memory management API: Memory Management API ===================== A *stream-ordered* memory allocator (or mempool for short) allocates/deallocates memory *asynchronously* from/to a mempool in a stream-ordered fashion, meaning memory operations and computations enqueued on the streams have a well-defined inter- and intra- stream dependency. There are several well-implemented stream-ordered mempools available, such as ``cudaMemPool_t`` that is built-in at the CUDA driver level since CUDA 11.2 (so that all CUDA applications in the same process can easily share the same pool, see `here `_) and the RAPIDS Memory Manager (`RMM`_). For a detailed introduction, see the `NVIDIA Developer Blog`_. .. _RMM: https://github.com/rapidsai/rmm .. _NVIDIA Developer Blog: https://developer.nvidia.com/blog/using-cuda-stream-ordered-memory-allocator-part-1/ The new device memory handler APIs allow users to bind a stream-ordered mempool to the library handle, such that cuTensorNet can take care of most of the memory management for users. Below is an illustration of what can be done: .. code-block:: c++ MyMemPool pool = MyMemPool(); // kept alive for the entire process in real apps int my_alloc(void* ctx, void** ptr, size_t size, cudaStream_t stream) { // assuming this is the memory allocation routine provided by my mempool return reinterpret_cast(ctx)->alloc(ptr, size, stream); } int my_dealloc(void* ctx, void* ptr, size_t size, cudaStream_t stream) { // assuming this is the memory deallocation routine provided by my mempool return reinterpret_cast(ctx)->dealloc(ptr, size, stream); } // create a mem handler and fill in the required members for the library to use cutensornetDeviceMemHandler_t handler; handler.ctx = reinterpret_cast(&pool); handler.device_alloc = my_alloc; handler.device_free = my_dealloc; memcpy(handler.name, std::string("my pool").c_str(), CUTENSORNET_ALLOCATOR_NAME_LEN); // bind the handler to the library handle cutensornetSetDeviceMemHandler(handle, &handler); /* ... perform the network creation & optimization as usual ... */ // create a workspace descriptor cutensornetWorkspaceDescriptor_t workDesc; // (this step is optional and workDesc can be set to NULL if one just wants // to use the "recommended" workspace size) cutensornetCreateWorkspaceDescriptor(handle, &workDesc); // User doesn’t compute the required sizes // User doesn’t query the workspace size (but one can if desired) // User doesn’t allocate memory! // User sets workspacePtr=NULL for the corresponding memory space (device, in this case) to indicate the library should // draw memory (of the "recommended" size, if the workspace size is set to 0 as shown below) from the user's pool; // if a nonzero size is set, we would use the given size instead of the recommended one. // (this step is also optional if workDesc has been set to NULL) cutensornetWorkspaceSet(handle, workDesc, CUTENSORNET_MEMSPACE_DEVICE, NULL, 0); // create a contraction plan cutensornetContractionPlan_t plan; cutensornetCreateContractionPlan(handle, descNet, optimizerInfo, workDesc, &plan); // autotune the plan with the workspace cutensornetContractionAutotune(handle, plan, rawDataIn, rawDataOut, workDesc, pref, stream); // perform actual contraction with the workspace for (int sliceId=0; sliceId