5.29. Graph Management

This section describes the graph management functions of CUDA runtime application programming interface.

Functions

__host__cudaError_t cudaGraphAddChildGraphNode ( cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaGraph_t childGraph )
Creates a child graph node and adds it to a graph.
__host__cudaError_t cudaGraphAddDependencies ( cudaGraph_t graph, const cudaGraphNode_t* from, const cudaGraphNode_t* to, size_t numDependencies )
Adds dependency edges to a graph.
__host__cudaError_t cudaGraphAddEmptyNode ( cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies )
Creates an empty node and adds it to a graph.
__host__cudaError_t cudaGraphAddHostNode ( cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaHostNodeParams* pNodeParams )
Creates a host execution node and adds it to a graph.
__host__cudaError_t cudaGraphAddKernelNode ( cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaKernelNodeParams* pNodeParams )
Creates a kernel execution node and adds it to a graph.
__host__cudaError_t cudaGraphAddMemcpyNode ( cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaMemcpy3DParms* pCopyParams )
Creates a memcpy node and adds it to a graph.
__host__cudaError_t cudaGraphAddMemsetNode ( cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaMemsetParams* pMemsetParams )
Creates a memset node and adds it to a graph.
__host__cudaError_t cudaGraphChildGraphNodeGetGraph ( cudaGraphNode_t node, cudaGraph_t* pGraph )
Gets a handle to the embedded graph of a child graph node.
__host__cudaError_t cudaGraphClone ( cudaGraph_t* pGraphClone, cudaGraph_t originalGraph )
Clones a graph.
__host__cudaError_t cudaGraphCreate ( cudaGraph_t* pGraph, unsigned int  flags )
Creates a graph.
__host__cudaError_t cudaGraphDestroy ( cudaGraph_t graph )
Destroys a graph.
__host__cudaError_t cudaGraphDestroyNode ( cudaGraphNode_t node )
Remove a node from the graph.
__host__cudaError_t cudaGraphExecDestroy ( cudaGraphExec_t graphExec )
Destroys an executable graph.
__host__cudaError_t cudaGraphExecHostNodeSetParams ( cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaHostNodeParams* pNodeParams )
Sets the parameters for a host node in the given graphExec.
__host__cudaError_t cudaGraphExecKernelNodeSetParams ( cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaKernelNodeParams* pNodeParams )
Sets the parameters for a kernel node in the given graphExec.
__host__cudaError_t cudaGraphExecMemcpyNodeSetParams ( cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaMemcpy3DParms* pNodeParams )
Sets the parameters for a memcpy node in the given graphExec.
__host__cudaError_t cudaGraphExecMemsetNodeSetParams ( cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaMemsetParams* pNodeParams )
Sets the parameters for a memset node in the given graphExec.
__host__cudaError_t cudaGraphExecUpdate ( cudaGraphExec_t hGraphExec, cudaGraph_t hGraph, cudaGraphNode_t* hErrorNode_out, cudaGraphExecUpdateResult ** updateResult_out )
Check whether an executable graph can be updated with a graph and perform the update if possible.
__host__cudaError_t cudaGraphGetEdges ( cudaGraph_t graph, cudaGraphNode_t* from, cudaGraphNode_t* to, size_t* numEdges )
Returns a graph's dependency edges.
__host__cudaError_t cudaGraphGetNodes ( cudaGraph_t graph, cudaGraphNode_t* nodes, size_t* numNodes )
Returns a graph's nodes.
__host__cudaError_t cudaGraphGetRootNodes ( cudaGraph_t graph, cudaGraphNode_t* pRootNodes, size_t* pNumRootNodes )
Returns a graph's root nodes.
__host__cudaError_t cudaGraphHostNodeGetParams ( cudaGraphNode_t node, cudaHostNodeParams* pNodeParams )
Returns a host node's parameters.
__host__cudaError_t cudaGraphHostNodeSetParams ( cudaGraphNode_t node, const cudaHostNodeParams* pNodeParams )
Sets a host node's parameters.
__host__cudaError_t cudaGraphInstantiate ( cudaGraphExec_t* pGraphExec, cudaGraph_t graph, cudaGraphNode_t* pErrorNode, char* pLogBuffer, size_t bufferSize )
Creates an executable graph from a graph.
__host__cudaError_t cudaGraphKernelNodeCopyAttributes ( cudaGraphNode_t hSrc, cudaGraphNode_t hDst )
Copies attributes from source node to destination node.
__host__cudaError_t cudaGraphKernelNodeGetAttribute ( cudaGraphNode_t hNode, cudaKernelNodeAttrID attr, cudaKernelNodeAttrValue* value_out )
Queries node attribute.
__host__cudaError_t cudaGraphKernelNodeGetParams ( cudaGraphNode_t node, cudaKernelNodeParams* pNodeParams )
Returns a kernel node's parameters.
__host__cudaError_t cudaGraphKernelNodeSetAttribute ( cudaGraphNode_t hNode, cudaKernelNodeAttrID attr, const cudaKernelNodeAttrValue* value )
Sets node attribute.
__host__cudaError_t cudaGraphKernelNodeSetParams ( cudaGraphNode_t node, const cudaKernelNodeParams* pNodeParams )
Sets a kernel node's parameters.
__host__cudaError_t cudaGraphLaunch ( cudaGraphExec_t graphExec, cudaStream_t stream )
Launches an executable graph in a stream.
__host__cudaError_t cudaGraphMemcpyNodeGetParams ( cudaGraphNode_t node, cudaMemcpy3DParms* pNodeParams )
Returns a memcpy node's parameters.
__host__cudaError_t cudaGraphMemcpyNodeSetParams ( cudaGraphNode_t node, const cudaMemcpy3DParms* pNodeParams )
Sets a memcpy node's parameters.
__host__cudaError_t cudaGraphMemsetNodeGetParams ( cudaGraphNode_t node, cudaMemsetParams* pNodeParams )
Returns a memset node's parameters.
__host__cudaError_t cudaGraphMemsetNodeSetParams ( cudaGraphNode_t node, const cudaMemsetParams* pNodeParams )
Sets a memset node's parameters.
__host__cudaError_t cudaGraphNodeFindInClone ( cudaGraphNode_t* pNode, cudaGraphNode_t originalNode, cudaGraph_t clonedGraph )
Finds a cloned version of a node.
__host__cudaError_t cudaGraphNodeGetDependencies ( cudaGraphNode_t node, cudaGraphNode_t* pDependencies, size_t* pNumDependencies )
Returns a node's dependencies.
__host__cudaError_t cudaGraphNodeGetDependentNodes ( cudaGraphNode_t node, cudaGraphNode_t* pDependentNodes, size_t* pNumDependentNodes )
Returns a node's dependent nodes.
__host__cudaError_t cudaGraphNodeGetType ( cudaGraphNode_t node, cudaGraphNodeType ** pType )
Returns a node's type.
__host__cudaError_t cudaGraphRemoveDependencies ( cudaGraph_t graph, const cudaGraphNode_t* from, const cudaGraphNode_t* to, size_t numDependencies )
Removes dependency edges from a graph.

Functions

__host__cudaError_t cudaGraphAddChildGraphNode ( cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaGraph_t childGraph )
Creates a child graph node and adds it to a graph.
Parameters
pGraphNode
- Returns newly created node
graph
- Graph to which to add the node
pDependencies
- Dependencies of the node
numDependencies
- Number of dependencies
childGraph
- The graph to clone into this node
Description

Creates a new node which executes an embedded graph, and adds it to graph with numDependencies dependencies specified via pDependencies. It is possible for numDependencies to be 0, in which case the node will be placed at the root of the graph. pDependencies may not have any duplicate entries. A handle to the new node will be returned in pGraphNode.

The node executes an embedded child graph. The child graph is cloned in this call.

Note:

See also:

cudaGraphChildGraphNodeGetGraph, cudaGraphCreate, cudaGraphDestroyNode, cudaGraphAddEmptyNode, cudaGraphAddKernelNode, cudaGraphAddHostNode, cudaGraphAddMemcpyNode, cudaGraphAddMemsetNode, cudaGraphClone

__host__cudaError_t cudaGraphAddDependencies ( cudaGraph_t graph, const cudaGraphNode_t* from, const cudaGraphNode_t* to, size_t numDependencies )
Adds dependency edges to a graph.
Parameters
graph
- Graph to which dependencies are added
from
- Array of nodes that provide the dependencies
to
- Array of dependent nodes
numDependencies
- Number of dependencies to be added
Description

The number of dependencies to be added is defined by numDependencies Elements in pFrom and pTo at corresponding indices define a dependency. Each node in pFrom and pTo must belong to graph.

If numDependencies is 0, elements in pFrom and pTo will be ignored. Specifying an existing dependency will return an error.

Note:

See also:

cudaGraphRemoveDependencies, cudaGraphGetEdges, cudaGraphNodeGetDependencies, cudaGraphNodeGetDependentNodes

__host__cudaError_t cudaGraphAddEmptyNode ( cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies )
Creates an empty node and adds it to a graph.
Parameters
pGraphNode
- Returns newly created node
graph
- Graph to which to add the node
pDependencies
- Dependencies of the node
numDependencies
- Number of dependencies
Description

Creates a new node which performs no operation, and adds it to graph with numDependencies dependencies specified via pDependencies. It is possible for numDependencies to be 0, in which case the node will be placed at the root of the graph. pDependencies may not have any duplicate entries. A handle to the new node will be returned in pGraphNode.

An empty node performs no operation during execution, but can be used for transitive ordering. For example, a phased execution graph with 2 groups of n nodes with a barrier between them can be represented using an empty node and 2*n dependency edges, rather than no empty node and n^2 dependency edges.

Note:

See also:

cudaGraphCreate, cudaGraphDestroyNode, cudaGraphAddChildGraphNode, cudaGraphAddKernelNode, cudaGraphAddHostNode, cudaGraphAddMemcpyNode, cudaGraphAddMemsetNode

__host__cudaError_t cudaGraphAddHostNode ( cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaHostNodeParams* pNodeParams )
Creates a host execution node and adds it to a graph.
Parameters
pGraphNode
- Returns newly created node
graph
- Graph to which to add the node
pDependencies
- Dependencies of the node
numDependencies
- Number of dependencies
pNodeParams
- Parameters for the host node
Description

Creates a new CPU execution node and adds it to graph with numDependencies dependencies specified via pDependencies and arguments specified in pNodeParams. It is possible for numDependencies to be 0, in which case the node will be placed at the root of the graph. pDependencies may not have any duplicate entries. A handle to the new node will be returned in pGraphNode.

When the graph is launched, the node will invoke the specified CPU function. Host nodes are not supported under MPS with pre-Volta GPUs.

Note:

See also:

cudaLaunchHostFunc, cudaGraphHostNodeGetParams, cudaGraphHostNodeSetParams, cudaGraphCreate, cudaGraphDestroyNode, cudaGraphAddChildGraphNode, cudaGraphAddEmptyNode, cudaGraphAddKernelNode, cudaGraphAddMemcpyNode, cudaGraphAddMemsetNode

__host__cudaError_t cudaGraphAddKernelNode ( cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaKernelNodeParams* pNodeParams )
Creates a kernel execution node and adds it to a graph.
Parameters
pGraphNode
- Returns newly created node
graph
- Graph to which to add the node
pDependencies
- Dependencies of the node
numDependencies
- Number of dependencies
pNodeParams
- Parameters for the GPU execution node
Description

Creates a new kernel execution node and adds it to graph with numDependencies dependencies specified via pDependencies and arguments specified in pNodeParams. It is possible for numDependencies to be 0, in which case the node will be placed at the root of the graph. pDependencies may not have any duplicate entries. A handle to the new node will be returned in pGraphNode.

The cudaKernelNodeParams structure is defined as:

‎  struct cudaKernelNodeParams
        {
            void* func;
            dim3 gridDim;
            dim3 blockDim;
            unsigned int sharedMemBytes;
            void **kernelParams;
            void **extra;
        };

When the graph is launched, the node will invoke kernel func on a (gridDim.x x gridDim.y x gridDim.z) grid of blocks. Each block contains (blockDim.x x blockDim.y x blockDim.z) threads.

sharedMem sets the amount of dynamic shared memory that will be available to each thread block.

Kernel parameters to func can be specified in one of two ways:

1) Kernel parameters can be specified via kernelParams. If the kernel has N parameters, then kernelParams needs to be an array of N pointers. Each pointer, from kernelParams[0] to kernelParams[N-1], points to the region of memory from which the actual parameter will be copied. The number of kernel parameters and their offsets and sizes do not need to be specified as that information is retrieved directly from the kernel's image.

2) Kernel parameters can also be packaged by the application into a single buffer that is passed in via extra. This places the burden on the application of knowing each kernel parameter's size and alignment/padding within the buffer. The extra parameter exists to allow this function to take additional less commonly used arguments. extra specifies a list of names of extra settings and their corresponding values. Each extra setting name is immediately followed by the corresponding value. The list must be terminated with either NULL or CU_LAUNCH_PARAM_END.

The error cudaErrorInvalidValue will be returned if kernel parameters are specified with both kernelParams and extra (i.e. both kernelParams and extra are non-NULL).

The kernelParams or extra array, as well as the argument values it points to, are copied during this call.

Note:

Kernels launched using graphs must not use texture and surface references. Reading or writing through any texture or surface reference is undefined behavior. This restriction does not apply to texture and surface objects.

Note:

See also:

cudaLaunchKernel, cudaGraphKernelNodeGetParams, cudaGraphKernelNodeSetParams, cudaGraphCreate, cudaGraphDestroyNode, cudaGraphAddChildGraphNode, cudaGraphAddEmptyNode, cudaGraphAddHostNode, cudaGraphAddMemcpyNode, cudaGraphAddMemsetNode

__host__cudaError_t cudaGraphAddMemcpyNode ( cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaMemcpy3DParms* pCopyParams )
Creates a memcpy node and adds it to a graph.
Parameters
pGraphNode
- Returns newly created node
graph
- Graph to which to add the node
pDependencies
- Dependencies of the node
numDependencies
- Number of dependencies
pCopyParams
- Parameters for the memory copy
Description

Creates a new memcpy node and adds it to graph with numDependencies dependencies specified via pDependencies. It is possible for numDependencies to be 0, in which case the node will be placed at the root of the graph. pDependencies may not have any duplicate entries. A handle to the new node will be returned in pGraphNode.

When the graph is launched, the node will perform the memcpy described by pCopyParams. See cudaMemcpy3D() for a description of the structure and its restrictions.

Memcpy nodes have some additional restrictions with regards to managed memory, if the system contains at least one device which has a zero value for the device attribute cudaDevAttrConcurrentManagedAccess.

Note:

See also:

cudaMemcpy3D, cudaGraphMemcpyNodeGetParams, cudaGraphMemcpyNodeSetParams, cudaGraphCreate, cudaGraphDestroyNode, cudaGraphAddChildGraphNode, cudaGraphAddEmptyNode, cudaGraphAddKernelNode, cudaGraphAddHostNode, cudaGraphAddMemsetNode

__host__cudaError_t cudaGraphAddMemsetNode ( cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaMemsetParams* pMemsetParams )
Creates a memset node and adds it to a graph.
Parameters
pGraphNode
- Returns newly created node
graph
- Graph to which to add the node
pDependencies
- Dependencies of the node
numDependencies
- Number of dependencies
pMemsetParams
- Parameters for the memory set
Description

Creates a new memset node and adds it to graph with numDependencies dependencies specified via pDependencies. It is possible for numDependencies to be 0, in which case the node will be placed at the root of the graph. pDependencies may not have any duplicate entries. A handle to the new node will be returned in pGraphNode.

The element size must be 1, 2, or 4 bytes. When the graph is launched, the node will perform the memset described by pMemsetParams.

Note:

See also:

cudaMemset2D, cudaGraphMemsetNodeGetParams, cudaGraphMemsetNodeSetParams, cudaGraphCreate, cudaGraphDestroyNode, cudaGraphAddChildGraphNode, cudaGraphAddEmptyNode, cudaGraphAddKernelNode, cudaGraphAddHostNode, cudaGraphAddMemcpyNode

__host__cudaError_t cudaGraphChildGraphNodeGetGraph ( cudaGraphNode_t node, cudaGraph_t* pGraph )
Gets a handle to the embedded graph of a child graph node.
Parameters
node
- Node to get the embedded graph for
pGraph
- Location to store a handle to the graph
Description

Gets a handle to the embedded graph in a child graph node. This call does not clone the graph. Changes to the graph will be reflected in the node, and the node retains ownership of the graph.

Note:

See also:

cudaGraphAddChildGraphNode, cudaGraphNodeFindInClone

__host__cudaError_t cudaGraphClone ( cudaGraph_t* pGraphClone, cudaGraph_t originalGraph )
Clones a graph.
Parameters
pGraphClone
- Returns newly created cloned graph
originalGraph
- Graph to clone
Description

This function creates a copy of originalGraph and returns it in pGraphClone. All parameters are copied into the cloned graph. The original graph may be modified after this call without affecting the clone.

Child graph nodes in the original graph are recursively copied into the clone.

Note:

See also:

cudaGraphCreate, cudaGraphNodeFindInClone

__host__cudaError_t cudaGraphCreate ( cudaGraph_t* pGraph, unsigned int  flags )
Creates a graph.
Parameters
pGraph
- Returns newly created graph
flags
- Graph creation flags, must be 0
Description

Creates an empty graph, which is returned via pGraph.

Note:

See also:

cudaGraphAddChildGraphNode, cudaGraphAddEmptyNode, cudaGraphAddKernelNode, cudaGraphAddHostNode, cudaGraphAddMemcpyNode, cudaGraphAddMemsetNode, cudaGraphInstantiate, cudaGraphDestroy, cudaGraphGetNodes, cudaGraphGetRootNodes, cudaGraphGetEdges, cudaGraphClone

__host__cudaError_t cudaGraphDestroy ( cudaGraph_t graph )
Destroys a graph.
Parameters
graph
- Graph to destroy
Description

Destroys the graph specified by graph, as well as all of its nodes.

Note:

See also:

cudaGraphCreate

__host__cudaError_t cudaGraphDestroyNode ( cudaGraphNode_t node )
Remove a node from the graph.
Parameters
node
- Node to remove
Description

Removes node from its graph. This operation also severs any dependencies of other nodes on node and vice versa.

Note:

See also:

cudaGraphAddChildGraphNode, cudaGraphAddEmptyNode, cudaGraphAddKernelNode, cudaGraphAddHostNode, cudaGraphAddMemcpyNode, cudaGraphAddMemsetNode

__host__cudaError_t cudaGraphExecDestroy ( cudaGraphExec_t graphExec )
Destroys an executable graph.
Parameters
graphExec
- Executable graph to destroy
Description

Destroys the executable graph specified by graphExec.

Note:

See also:

cudaGraphInstantiate, cudaGraphLaunch

__host__cudaError_t cudaGraphExecHostNodeSetParams ( cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaHostNodeParams* pNodeParams )
Sets the parameters for a host node in the given graphExec.
Parameters
hGraphExec
- The executable graph in which to set the specified node
node
- Host node from the graph which was used to instantiate graphExec
pNodeParams
- Updated Parameters to set
Description

Updates the work represented by node in hGraphExec as though node had contained pNodeParams at instantiation. node must remain in the graph which was used to instantiate hGraphExec. Changed edges to and from node are ignored.

The modifications only affect future launches of hGraphExec. Already enqueued or running launches of hGraphExec are not affected by this call. node is also not modified by this call.

Note:

See also:

cudaGraphAddHostNode, cudaGraphHostNodeSetParamscudaGraphInstantiatecudaGraphExecKernelNodeSetParamscudaGraphExecMemcpyNodeSetParamscudaGraphExecMemsetNodeSetParams

__host__cudaError_t cudaGraphExecKernelNodeSetParams ( cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaKernelNodeParams* pNodeParams )
Sets the parameters for a kernel node in the given graphExec.
Parameters
hGraphExec
- The executable graph in which to set the specified node
node
- kernel node from the graph from which graphExec was instantiated
pNodeParams
- Updated Parameters to set
Description

Sets the parameters of a kernel node in an executable graph hGraphExec. The node is identified by the corresponding node node in the non-executable graph, from which the executable graph was instantiated.

node must not have been removed from the original graph. The func field of nodeParams cannot be modified and must match the original value. All other values can be modified.

The modifications only affect future launches of hGraphExec. Already enqueued or running launches of hGraphExec are not affected by this call. node is also not modified by this call.

Note:

See also:

cudaGraphAddKernelNode, cudaGraphKernelNodeSetParams, cudaGraphInstantiate

__host__cudaError_t cudaGraphExecMemcpyNodeSetParams ( cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaMemcpy3DParms* pNodeParams )
Sets the parameters for a memcpy node in the given graphExec.
Parameters
hGraphExec
- The executable graph in which to set the specified node
node
pNodeParams
- Updated Parameters to set
Description

Updates the work represented by node in hGraphExec as though node had contained pNodeParams at instantiation. node must remain in the graph which was used to instantiate hGraphExec. Changed edges to and from node are ignored.

The source and destination memory in pNodeParams must be allocated from the same contexts as the original source and destination memory. Both the instantiation-time memory operands and the memory operands in pNodeParams must be 1-dimensional. Zero-length operations are not supported.

The modifications only affect future launches of hGraphExec. Already enqueued or running launches of hGraphExec are not affected by this call. node is also not modified by this call.

Returns cudaErrorInvalidValue if the memory operands' mappings changed or either the original or new memory operands are multidimensional.

Note:

See also:

cudaGraphAddMemcpyNode, cudaGraphMemcpyNodeSetParamscudaGraphInstantiatecudaGraphExecKernelNodeSetParamscudaGraphExecMemsetNodeSetParamscudaGraphExecHostNodeSetParams

__host__cudaError_t cudaGraphExecMemsetNodeSetParams ( cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaMemsetParams* pNodeParams )
Sets the parameters for a memset node in the given graphExec.
Parameters
hGraphExec
- The executable graph in which to set the specified node
node
- Memset node from the graph which was used to instantiate graphExec
pNodeParams
- Updated Parameters to set
Description

Updates the work represented by node in hGraphExec as though node had contained pNodeParams at instantiation. node must remain in the graph which was used to instantiate hGraphExec. Changed edges to and from node are ignored.

The destination memory in pNodeParams must be allocated from the same context as the original destination memory. Both the instantiation-time memory operand and the memory operand in pNodeParams must be 1-dimensional. Zero-length operations are not supported.

The modifications only affect future launches of hGraphExec. Already enqueued or running launches of hGraphExec are not affected by this call. node is also not modified by this call.

Returns cudaErrorInvalidValue if the memory operand's mappings changed or either the original or new memory operand are multidimensional.

Note:

See also:

cudaGraphAddMemsetNode, cudaGraphMemsetNodeSetParamscudaGraphInstantiatecudaGraphExecKernelNodeSetParamscudaGraphExecMemcpyNodeSetParamscudaGraphExecHostNodeSetParams

__host__cudaError_t cudaGraphExecUpdate ( cudaGraphExec_t hGraphExec, cudaGraph_t hGraph, cudaGraphNode_t* hErrorNode_out, cudaGraphExecUpdateResult ** updateResult_out )
Check whether an executable graph can be updated with a graph and perform the update if possible.
Parameters
hGraphExec
The instantiated graph to be updated
hGraph
The graph containing the updated parameters
hErrorNode_out
The node which caused the permissibility check to forbid the update, if any
updateResult_out
Whether the graph update was permitted. If was forbidden, the reason why
Description

Updates the node parameters in the instantiated graph specified by hGraphExec with the node parameters in a topologically identical graph specified by hGraph.

Limitations:

  • Kernel nodes:
  • Memset and memcpy nodes:
    • The CUDA device(s) to which the operand(s) was allocated/mapped cannot change.

    • The source/destination memory must be allocated from the same contexts as the original source/destination memory.

    • Only 1D memsets can be changed.

  • Additional memcpy node restrictions:
    • Changing either the source or destination memory type(i.e. CU_MEMORYTYPE_DEVICE, CU_MEMORYTYPE_ARRAY, etc.) is not supported.

Note: The API may add further restrictions in future releases. The return code should always be checked.

Some node types are not currently supported:

  • Empty graph nodes(cudaGraphNodeTypeEmpty)

  • Child graphs(cudaGraphNodeTypeGraph).

cudaGraphExecUpdate sets updateResult_out to cudaGraphExecUpdateErrorTopologyChanged under the following conditions:

  • The count of nodes directly in hGraphExec and hGraph differ, in which case hErrorNode_out is NULL.

  • A node is deleted in hGraph but not not its pair from hGraphExec, in which case hErrorNode_out is NULL.

  • A node is deleted in hGraphExec but not its pair from hGraph, in which case hErrorNode_out is the pairless node from hGraph.

  • The dependent nodes of a pair differ, in which case hErrorNode_out is the node from hGraph.

cudaGraphExecUpdate sets updateResult_out to:

  • cudaGraphExecUpdateError if passed an invalid value.

  • cudaGraphExecUpdateErrorTopologyChanged if the graph topology changed

  • cudaGraphExecUpdateErrorNodeTypeChanged if the type of a node changed, in which case hErrorNode_out is set to the node from hGraph.

  • cudaGraphExecUpdateErrorFunctionChanged if the func field of a kernel changed, in which case hErrorNode_out is set to the node from hGraph

  • cudaGraphExecUpdateErrorParametersChanged if any parameters to a node changed in a way that is not supported, in which case hErrorNode_out is set to the node from hGraph

  • cudaGraphExecUpdateErrorNotSupported if something about a node is unsupported, like the node's type or configuration, in which case hErrorNode_out is set to the node from hGraph

If updateResult_out isn't set in one of the situations described above, the update check passes and cudaGraphExecUpdate updates hGraphExec to match the contents of hGraph. If an error happens during the update, updateResult_out will be set to cudaGraphExecUpdateError; otherwise, updateResult_out is set to cudaGraphExecUpdateSuccess.

cudaGraphExecUpdate returns cudaSuccess when the updated was performed successfully. It returns cudaErrorGraphExecUpdateFailure if the graph update was not performed because it included changes which violated constraints specific to instantiated graph update.

Note:

See also:

cudaGraphInstantiate,

__host__cudaError_t cudaGraphGetEdges ( cudaGraph_t graph, cudaGraphNode_t* from, cudaGraphNode_t* to, size_t* numEdges )
Returns a graph's dependency edges.
Parameters
graph
- Graph to get the edges from
from
- Location to return edge endpoints
to
- Location to return edge endpoints
numEdges
- See description
Description

Returns a list of graph's dependency edges. Edges are returned via corresponding indices in from and to; that is, the node in to[i] has a dependency on the node in from[i]. from and to may both be NULL, in which case this function only returns the number of edges in numEdges. Otherwise, numEdges entries will be filled in. If numEdges is higher than the actual number of edges, the remaining entries in from and to will be set to NULL, and the number of edges actually returned will be written to numEdges.

Note:

See also:

cudaGraphGetNodes, cudaGraphGetRootNodes, cudaGraphAddDependencies, cudaGraphRemoveDependencies, cudaGraphNodeGetDependencies, cudaGraphNodeGetDependentNodes

__host__cudaError_t cudaGraphGetNodes ( cudaGraph_t graph, cudaGraphNode_t* nodes, size_t* numNodes )
Returns a graph's nodes.
Parameters
graph
- Graph to query
nodes
- Pointer to return the nodes
numNodes
- See description
Description

Returns a list of graph's nodes. nodes may be NULL, in which case this function will return the number of nodes in numNodes. Otherwise, numNodes entries will be filled in. If numNodes is higher than the actual number of nodes, the remaining entries in nodes will be set to NULL, and the number of nodes actually obtained will be returned in numNodes.

Note:

See also:

cudaGraphCreate, cudaGraphGetRootNodes, cudaGraphGetEdges, cudaGraphNodeGetType, cudaGraphNodeGetDependencies, cudaGraphNodeGetDependentNodes

__host__cudaError_t cudaGraphGetRootNodes ( cudaGraph_t graph, cudaGraphNode_t* pRootNodes, size_t* pNumRootNodes )
Returns a graph's root nodes.
Parameters
graph
- Graph to query
pRootNodes
- Pointer to return the root nodes
pNumRootNodes
- See description
Description

Returns a list of graph's root nodes. pRootNodes may be NULL, in which case this function will return the number of root nodes in pNumRootNodes. Otherwise, pNumRootNodes entries will be filled in. If pNumRootNodes is higher than the actual number of root nodes, the remaining entries in pRootNodes will be set to NULL, and the number of nodes actually obtained will be returned in pNumRootNodes.

Note:

See also:

cudaGraphCreate, cudaGraphGetNodes, cudaGraphGetEdges, cudaGraphNodeGetType, cudaGraphNodeGetDependencies, cudaGraphNodeGetDependentNodes

__host__cudaError_t cudaGraphHostNodeGetParams ( cudaGraphNode_t node, cudaHostNodeParams* pNodeParams )
Returns a host node's parameters.
Parameters
node
- Node to get the parameters for
pNodeParams
- Pointer to return the parameters
Description

Returns the parameters of host node node in pNodeParams.

Note:

See also:

cudaLaunchHostFunc, cudaGraphAddHostNode, cudaGraphHostNodeSetParams

__host__cudaError_t cudaGraphHostNodeSetParams ( cudaGraphNode_t node, const cudaHostNodeParams* pNodeParams )
Sets a host node's parameters.
Parameters
node
- Node to set the parameters for
pNodeParams
- Parameters to copy
Description

Sets the parameters of host node node to nodeParams.

Note:

See also:

cudaLaunchHostFunc, cudaGraphAddHostNode, cudaGraphHostNodeGetParams

__host__cudaError_t cudaGraphInstantiate ( cudaGraphExec_t* pGraphExec, cudaGraph_t graph, cudaGraphNode_t* pErrorNode, char* pLogBuffer, size_t bufferSize )
Creates an executable graph from a graph.
Parameters
pGraphExec
- Returns instantiated graph
graph
- Graph to instantiate
pErrorNode
- In case of an instantiation error, this may be modified to indicate a node contributing to the error
pLogBuffer
- A character buffer to store diagnostic messages
bufferSize
- Size of the log buffer in bytes
Description

Instantiates graph as an executable graph. The graph is validated for any structural constraints or intra-node constraints which were not previously validated. If instantiation is successful, a handle to the instantiated graph is returned in pGraphExec.

If there are any errors, diagnostic information may be returned in pErrorNode and pLogBuffer. This is the primary way to inspect instantiation errors. The output will be null terminated unless the diagnostics overflow the buffer. In this case, they will be truncated, and the last byte can be inspected to determine if truncation occurred.

Note:

See also:

cudaGraphCreate, cudaGraphLaunch, cudaGraphExecDestroy

__host__cudaError_t cudaGraphKernelNodeCopyAttributes ( cudaGraphNode_t hSrc, cudaGraphNode_t hDst )
Copies attributes from source node to destination node.
Returns

cudaSuccess, cudaErrorInvalidContext

Description

Copies attributes from source node src to destination node dst. Both node must have the same context.

Note:

Note that this function may also return error codes from previous, asynchronous launches.

See also:

cudaAccessPolicyWindow

__host__cudaError_t cudaGraphKernelNodeGetAttribute ( cudaGraphNode_t hNode, cudaKernelNodeAttrID attr, cudaKernelNodeAttrValue* value_out )
Queries node attribute.
Parameters
hNode
attr
value_out
Description

Queries attribute attr from node hNode and stores it in corresponding member of value_out.

Note:

Note that this function may also return error codes from previous, asynchronous launches.

See also:

cudaAccessPolicyWindow

__host__cudaError_t cudaGraphKernelNodeGetParams ( cudaGraphNode_t node, cudaKernelNodeParams* pNodeParams )
Returns a kernel node's parameters.
Parameters
node
- Node to get the parameters for
pNodeParams
- Pointer to return the parameters
Description

Returns the parameters of kernel node node in pNodeParams. The kernelParams or extra array returned in pNodeParams, as well as the argument values it points to, are owned by the node. This memory remains valid until the node is destroyed or its parameters are modified, and should not be modified directly. Use cudaGraphKernelNodeSetParams to update the parameters of this node.

The params will contain either kernelParams or extra, according to which of these was most recently set on the node.

Note:

See also:

cudaLaunchKernel, cudaGraphAddKernelNode, cudaGraphKernelNodeSetParams

__host__cudaError_t cudaGraphKernelNodeSetAttribute ( cudaGraphNode_t hNode, cudaKernelNodeAttrID attr, const cudaKernelNodeAttrValue* value )
Sets node attribute.
Parameters
hNode
attr
value
Description

Sets attribute attr on node hNode from corresponding attribute of value.

Note:

Note that this function may also return error codes from previous, asynchronous launches.

See also:

cudaAccessPolicyWindow

__host__cudaError_t cudaGraphKernelNodeSetParams ( cudaGraphNode_t node, const cudaKernelNodeParams* pNodeParams )
Sets a kernel node's parameters.
Parameters
node
- Node to set the parameters for
pNodeParams
- Parameters to copy
Description

Sets the parameters of kernel node node to pNodeParams.

Note:

See also:

cudaLaunchKernel, cudaGraphAddKernelNode, cudaGraphKernelNodeGetParams

__host__cudaError_t cudaGraphLaunch ( cudaGraphExec_t graphExec, cudaStream_t stream )
Launches an executable graph in a stream.
Parameters
graphExec
- Executable graph to launch
stream
- Stream in which to launch the graph
Description

Executes graphExec in stream. Only one instance of graphExec may be executing at a time. Each launch is ordered behind both any previous work in stream and any previous launches of graphExec. To execute a graph concurrently, it must be instantiated multiple times into multiple executable graphs.

Note:

See also:

cudaGraphInstantiate, cudaGraphExecDestroy

__host__cudaError_t cudaGraphMemcpyNodeGetParams ( cudaGraphNode_t node, cudaMemcpy3DParms* pNodeParams )
Returns a memcpy node's parameters.
Parameters
node
- Node to get the parameters for
pNodeParams
- Pointer to return the parameters
Description

Returns the parameters of memcpy node node in pNodeParams.

Note:

See also:

cudaMemcpy3D, cudaGraphAddMemcpyNode, cudaGraphMemcpyNodeSetParams

__host__cudaError_t cudaGraphMemcpyNodeSetParams ( cudaGraphNode_t node, const cudaMemcpy3DParms* pNodeParams )
Sets a memcpy node's parameters.
Parameters
node
- Node to set the parameters for
pNodeParams
- Parameters to copy
Description

Sets the parameters of memcpy node node to pNodeParams.

Note:

See also:

cudaMemcpy3D, cudaGraphAddMemcpyNode, cudaGraphMemcpyNodeGetParams

__host__cudaError_t cudaGraphMemsetNodeGetParams ( cudaGraphNode_t node, cudaMemsetParams* pNodeParams )
Returns a memset node's parameters.
Parameters
node
- Node to get the parameters for
pNodeParams
- Pointer to return the parameters
Description

Returns the parameters of memset node node in pNodeParams.

Note:

See also:

cudaMemset2D, cudaGraphAddMemsetNode, cudaGraphMemsetNodeSetParams

__host__cudaError_t cudaGraphMemsetNodeSetParams ( cudaGraphNode_t node, const cudaMemsetParams* pNodeParams )
Sets a memset node's parameters.
Parameters
node
- Node to set the parameters for
pNodeParams
- Parameters to copy
Description

Sets the parameters of memset node node to pNodeParams.

Note:

See also:

cudaMemset2D, cudaGraphAddMemsetNode, cudaGraphMemsetNodeGetParams

__host__cudaError_t cudaGraphNodeFindInClone ( cudaGraphNode_t* pNode, cudaGraphNode_t originalNode, cudaGraph_t clonedGraph )
Finds a cloned version of a node.
Parameters
pNode
- Returns handle to the cloned node
originalNode
- Handle to the original node
clonedGraph
- Cloned graph to query
Description

This function returns the node in clonedGraph corresponding to originalNode in the original graph.

clonedGraph must have been cloned from originalGraph via cudaGraphClone. originalNode must have been in originalGraph at the time of the call to cudaGraphClone, and the corresponding cloned node in clonedGraph must not have been removed. The cloned node is then returned via pClonedNode.

Note:

See also:

cudaGraphClone

__host__cudaError_t cudaGraphNodeGetDependencies ( cudaGraphNode_t node, cudaGraphNode_t* pDependencies, size_t* pNumDependencies )
Returns a node's dependencies.
Parameters
node
- Node to query
pDependencies
- Pointer to return the dependencies
pNumDependencies
- See description
Description

Returns a list of node's dependencies. pDependencies may be NULL, in which case this function will return the number of dependencies in pNumDependencies. Otherwise, pNumDependencies entries will be filled in. If pNumDependencies is higher than the actual number of dependencies, the remaining entries in pDependencies will be set to NULL, and the number of nodes actually obtained will be returned in pNumDependencies.

Note:

See also:

cudaGraphNodeGetDependentNodes, cudaGraphGetNodes, cudaGraphGetRootNodes, cudaGraphGetEdges, cudaGraphAddDependencies, cudaGraphRemoveDependencies

__host__cudaError_t cudaGraphNodeGetDependentNodes ( cudaGraphNode_t node, cudaGraphNode_t* pDependentNodes, size_t* pNumDependentNodes )
Returns a node's dependent nodes.
Parameters
node
- Node to query
pDependentNodes
- Pointer to return the dependent nodes
pNumDependentNodes
- See description
Description

Returns a list of node's dependent nodes. pDependentNodes may be NULL, in which case this function will return the number of dependent nodes in pNumDependentNodes. Otherwise, pNumDependentNodes entries will be filled in. If pNumDependentNodes is higher than the actual number of dependent nodes, the remaining entries in pDependentNodes will be set to NULL, and the number of nodes actually obtained will be returned in pNumDependentNodes.

Note:

See also:

cudaGraphNodeGetDependencies, cudaGraphGetNodes, cudaGraphGetRootNodes, cudaGraphGetEdges, cudaGraphAddDependencies, cudaGraphRemoveDependencies

__host__cudaError_t cudaGraphNodeGetType ( cudaGraphNode_t node, cudaGraphNodeType ** pType )
Returns a node's type.
Parameters
node
- Node to query
pType
- Pointer to return the node type
Description

Returns the node type of node in pType.

Note:

See also:

cudaGraphGetNodes, cudaGraphGetRootNodes, cudaGraphChildGraphNodeGetGraph, cudaGraphKernelNodeGetParams, cudaGraphKernelNodeSetParams, cudaGraphHostNodeGetParams, cudaGraphHostNodeSetParams, cudaGraphMemcpyNodeGetParams, cudaGraphMemcpyNodeSetParams, cudaGraphMemsetNodeGetParams, cudaGraphMemsetNodeSetParams

__host__cudaError_t cudaGraphRemoveDependencies ( cudaGraph_t graph, const cudaGraphNode_t* from, const cudaGraphNode_t* to, size_t numDependencies )
Removes dependency edges from a graph.
Parameters
graph
- Graph from which to remove dependencies
from
- Array of nodes that provide the dependencies
to
- Array of dependent nodes
numDependencies
- Number of dependencies to be removed
Description

The number of pDependencies to be removed is defined by numDependencies. Elements in pFrom and pTo at corresponding indices define a dependency. Each node in pFrom and pTo must belong to graph.

If numDependencies is 0, elements in pFrom and pTo will be ignored. Specifying a non-existing dependency will return an error.

Note:

See also:

cudaGraphAddDependencies, cudaGraphGetEdges, cudaGraphNodeGetDependencies, cudaGraphNodeGetDependentNodes