6.1. Data types used by CUDA driver
Classes
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- union
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- struct
- union
- struct
Defines
- #define CUDA_ARRAY3D_2DARRAY 0x01
- #define CUDA_ARRAY3D_COLOR_ATTACHMENT 0x20
- #define CUDA_ARRAY3D_CUBEMAP 0x04
- #define CUDA_ARRAY3D_DEFERRED_MAPPING 0x80
- #define CUDA_ARRAY3D_DEPTH_TEXTURE 0x10
- #define CUDA_ARRAY3D_LAYERED 0x01
- #define CUDA_ARRAY3D_SPARSE 0x40
- #define CUDA_ARRAY3D_SURFACE_LDST 0x02
- #define CUDA_ARRAY3D_TEXTURE_GATHER 0x08
- #define CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC 0x02
- #define CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC 0x01
- #define CUDA_EGL_INFINITE_TIMEOUT 0xFFFFFFFF
- #define CUDA_EXTERNAL_MEMORY_DEDICATED 0x1
- #define CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC 0x01
- #define CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC 0x02
- #define CUDA_NVSCISYNC_ATTR_SIGNAL 0x1
- #define CUDA_NVSCISYNC_ATTR_WAIT 0x2
- #define CUDA_VERSION 12030
- #define CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL 0x1
- #define CU_DEVICE_CPU ((CUdevice)-1)
- #define CU_DEVICE_INVALID ((CUdevice)-2)
- #define CU_GRAPH_COND_ASSIGN_DEFAULT 0x1
- #define CU_GRAPH_KERNEL_NODE_PORT_DEFAULT 0
- #define CU_GRAPH_KERNEL_NODE_PORT_LAUNCH_ORDER 2
- #define CU_GRAPH_KERNEL_NODE_PORT_PROGRAMMATIC 1
- #define CU_IPC_HANDLE_SIZE 64
- #define CU_LAUNCH_PARAM_BUFFER_POINTER
- #define CU_LAUNCH_PARAM_BUFFER_POINTER_AS_INT 0x01
- #define CU_LAUNCH_PARAM_BUFFER_SIZE
- #define CU_LAUNCH_PARAM_BUFFER_SIZE_AS_INT 0x02
- #define CU_LAUNCH_PARAM_END
- #define CU_LAUNCH_PARAM_END_AS_INT 0x00
- #define CU_MEMHOSTALLOC_DEVICEMAP 0x02
- #define CU_MEMHOSTALLOC_PORTABLE 0x01
- #define CU_MEMHOSTALLOC_WRITECOMBINED 0x04
- #define CU_MEMHOSTREGISTER_DEVICEMAP 0x02
- #define CU_MEMHOSTREGISTER_IOMEMORY 0x04
- #define CU_MEMHOSTREGISTER_PORTABLE 0x01
- #define CU_MEMHOSTREGISTER_READ_ONLY 0x08
- #define CU_MEM_CREATE_USAGE_TILE_POOL 0x1
- #define CU_PARAM_TR_DEFAULT -1
- #define CU_STREAM_LEGACY ((CUstream)0x1)
- #define CU_STREAM_PER_THREAD ((CUstream)0x2)
- #define CU_TENSOR_MAP_NUM_QWORDS 16
- #define CU_TRSA_OVERRIDE_FORMAT 0x01
- #define CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION 0x20
- #define CU_TRSF_NORMALIZED_COORDINATES 0x02
- #define CU_TRSF_READ_AS_INTEGER 0x01
- #define CU_TRSF_SEAMLESS_CUBEMAP 0x40
- #define CU_TRSF_SRGB 0x10
- #define MAX_PLANES 3
Typedefs
- typedef struct CUaccessPolicyWindow_v1 CUaccessPolicyWindow
- typedef CUarray_st * CUarray
- typedef CUctx_st * CUcontext
- typedef CUdevice_v1 CUdevice
- typedef int CUdevice_v1
- typedef CUdeviceptr_v2 CUdeviceptr
- typedef unsigned int CUdeviceptr_v2
- typedef CUeglStreamConnection_st * CUeglStreamConnection
- typedef CUevent_st * CUevent
- typedef struct CUexecAffinityParam_v1 CUexecAffinityParam
- typedef CUextMemory_st * CUexternalMemory
- typedef CUextSemaphore_st * CUexternalSemaphore
- typedef CUfunc_st * CUfunction
- typedef CUgraph_st * CUgraph
- typedef cuuint64_t CUgraphConditionalHandle
- typedef CUgraphExec_st * CUgraphExec
- typedef CUgraphNode_st * CUgraphNode
- typedef CUgraphicsResource_st * CUgraphicsResource
- typedef void(CUDA_CB* CUhostFn )( void* userData )
- typedef CUkern_st * CUkernel
- typedef CUlib_st * CUlibrary
- typedef CUmemPoolHandle_st * CUmemoryPool
- typedef CUmipmappedArray_st * CUmipmappedArray
- typedef CUmod_st * CUmodule
- typedef size_t(CUDA_CB* CUoccupancyB2DSize )( int blockSize )
- typedef CUstream_st * CUstream
- typedef void(CUDA_CB* CUstreamCallback )( CUstream hStream, CUresult status, void* userData )
- typedef CUsurfObject_v1 CUsurfObject
- typedef unsigned long long CUsurfObject_v1
- typedef CUsurfref_st * CUsurfref
- typedef CUtexObject_v1 CUtexObject
- typedef unsigned long long CUtexObject_v1
- typedef CUtexref_st * CUtexref
- typedef CUuserObject_st * CUuserObject
Enumerations
- enum CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS
- enum CUGPUDirectRDMAWritesOrdering
- enum CUaccessProperty
- enum CUaddress_mode
- enum CUarraySparseSubresourceType
- enum CUarray_cubemap_face
- enum CUarray_format
- enum CUclusterSchedulingPolicy
- enum CUcomputemode
- enum CUctx_flags
- enum CUdevice_P2PAttribute
- enum CUdevice_attribute
- enum CUdriverProcAddressQueryResult
- enum CUdriverProcAddress_flags
- enum CUeglColorFormat
- enum CUeglFrameType
- enum CUeglResourceLocationFlags
- enum CUevent_flags
- enum CUevent_record_flags
- enum CUevent_sched_flags
- enum CUevent_wait_flags
- enum CUexecAffinityType
- enum CUexternalMemoryHandleType
- enum CUexternalSemaphoreHandleType
- enum CUfilter_mode
- enum CUflushGPUDirectRDMAWritesOptions
- enum CUflushGPUDirectRDMAWritesScope
- enum CUflushGPUDirectRDMAWritesTarget
- enum CUfunc_cache
- enum CUfunction_attribute
- enum CUgraphConditionalNodeType
- enum CUgraphDebugDot_flags
- enum CUgraphDependencyType
- enum CUgraphExecUpdateResult
- enum CUgraphInstantiateResult
- enum CUgraphInstantiate_flags
- enum CUgraphNodeType
- enum CUgraphicsMapResourceFlags
- enum CUgraphicsRegisterFlags
- enum CUipcMem_flags
- enum CUjitInputType
- enum CUjit_cacheMode
- enum CUjit_fallback
- enum CUjit_option
- enum CUjit_target
- enum CUlaunchAttributeID
- enum CUlaunchMemSyncDomain
- enum CUlibraryOption
- enum CUlimit
- enum CUmemAccess_flags
- enum CUmemAllocationCompType
- enum CUmemAllocationGranularity_flags
- enum CUmemAllocationHandleType
- enum CUmemAllocationType
- enum CUmemAttach_flags
- enum CUmemHandleType
- enum CUmemLocationType
- enum CUmemOperationType
- enum CUmemPool_attribute
- enum CUmemRangeHandleType
- enum CUmem_advise
- enum CUmemorytype
- enum CUmulticastGranularity_flags
- enum CUoccupancy_flags
- enum CUpointer_attribute
- enum CUresourceViewFormat
- enum CUresourcetype
- enum CUresult
- enum CUshared_carveout
- enum CUsharedconfig
- enum CUstreamBatchMemOpType
- enum CUstreamCaptureMode
- enum CUstreamCaptureStatus
- enum CUstreamMemoryBarrier_flags
- enum CUstreamUpdateCaptureDependencies_flags
- enum CUstreamWaitValue_flags
- enum CUstreamWriteValue_flags
- enum CUstream_flags
- enum CUtensorMapDataType
- enum CUtensorMapFloatOOBfill
- enum CUtensorMapInterleave
- enum CUtensorMapL2promotion
- enum CUtensorMapSwizzle
- enum CUuserObjectRetain_flags
- enum CUuserObject_flags
- enum cl_context_flags
- enum cl_event_flags
Defines
- #define CUDA_ARRAY3D_2DARRAY 0x01
-
Deprecated, use CUDA_ARRAY3D_LAYERED
- #define CUDA_ARRAY3D_COLOR_ATTACHMENT 0x20
-
This flag indicates that the CUDA array may be bound as a color target in an external graphics API
- #define CUDA_ARRAY3D_CUBEMAP 0x04
-
If set, the CUDA array is a collection of six 2D arrays, representing faces of a cube. The width of such a CUDA array must be equal to its height, and Depth must be six. If CUDA_ARRAY3D_LAYERED flag is also set, then the CUDA array is a collection of cubemaps and Depth must be a multiple of six.
- #define CUDA_ARRAY3D_DEFERRED_MAPPING 0x80
-
This flag if set indicates that the CUDA array or CUDA mipmapped array will allow deferred memory mapping
- #define CUDA_ARRAY3D_DEPTH_TEXTURE 0x10
-
This flag if set indicates that the CUDA array is a DEPTH_TEXTURE.
- #define CUDA_ARRAY3D_LAYERED 0x01
-
If set, the CUDA array is a collection of layers, where each layer is either a 1D or a 2D array and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies the number of layers, not the depth of a 3D array.
- #define CUDA_ARRAY3D_SPARSE 0x40
-
This flag if set indicates that the CUDA array or CUDA mipmapped array is a sparse CUDA array or CUDA mipmapped array respectively
- #define CUDA_ARRAY3D_SURFACE_LDST 0x02
-
This flag must be set in order to bind a surface reference to the CUDA array
- #define CUDA_ARRAY3D_TEXTURE_GATHER 0x08
-
This flag must be set in order to perform texture gather operations on a CUDA array.
- #define CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_POST_LAUNCH_SYNC 0x02
-
If set, any subsequent work pushed in a stream that participated in a call to cuLaunchCooperativeKernelMultiDevice will only wait for the kernel launched on the GPU corresponding to that stream to complete before it begins execution.
- #define CUDA_COOPERATIVE_LAUNCH_MULTI_DEVICE_NO_PRE_LAUNCH_SYNC 0x01
-
If set, each kernel launched as part of cuLaunchCooperativeKernelMultiDevice only waits for prior work in the stream corresponding to that GPU to complete before the kernel begins execution.
- #define CUDA_EGL_INFINITE_TIMEOUT 0xFFFFFFFF
-
Indicates that timeout for cuEGLStreamConsumerAcquireFrame is infinite.
- #define CUDA_EXTERNAL_MEMORY_DEDICATED 0x1
-
Indicates that the external memory object is a dedicated resource
- #define CUDA_EXTERNAL_SEMAPHORE_SIGNAL_SKIP_NVSCIBUF_MEMSYNC 0x01
-
When the flags parameter of CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS contains this flag, it indicates that signaling an external semaphore object should skip performing appropriate memory synchronization operations over all the external memory objects that are imported as CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF, which otherwise are performed by default to ensure data coherency with other importers of the same NvSciBuf memory objects.
- #define CUDA_EXTERNAL_SEMAPHORE_WAIT_SKIP_NVSCIBUF_MEMSYNC 0x02
-
When the flags parameter of CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS contains this flag, it indicates that waiting on an external semaphore object should skip performing appropriate memory synchronization operations over all the external memory objects that are imported as CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF, which otherwise are performed by default to ensure data coherency with other importers of the same NvSciBuf memory objects.
- #define CUDA_NVSCISYNC_ATTR_SIGNAL 0x1
-
When flags of cuDeviceGetNvSciSyncAttributes is set to this, it indicates that application needs signaler specific NvSciSyncAttr to be filled by cuDeviceGetNvSciSyncAttributes.
- #define CUDA_NVSCISYNC_ATTR_WAIT 0x2
-
When flags of cuDeviceGetNvSciSyncAttributes is set to this, it indicates that application needs waiter specific NvSciSyncAttr to be filled by cuDeviceGetNvSciSyncAttributes.
- #define CUDA_VERSION 12030
-
CUDA API version number
- #define CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL 0x1
-
Indicates that the layered sparse CUDA array or CUDA mipmapped array has a single mip tail region for all layers
- #define CU_DEVICE_CPU ((CUdevice)-1)
-
Device that represents the CPU
- #define CU_DEVICE_INVALID ((CUdevice)-2)
-
Device that represents an invalid device
- #define CU_GRAPH_COND_ASSIGN_DEFAULT 0x1
-
Conditional node handle flags Default value is applied when graph is launched.
- #define CU_GRAPH_KERNEL_NODE_PORT_DEFAULT 0
-
This port activates when the kernel has finished executing.
- #define CU_GRAPH_KERNEL_NODE_PORT_LAUNCH_ORDER 2
-
This port activates when all blocks of the kernel have begun execution. See also CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT.
- #define CU_GRAPH_KERNEL_NODE_PORT_PROGRAMMATIC 1
-
This port activates when all blocks of the kernel have performed cudaTriggerProgrammaticLaunchCompletion() or have terminated. It must be used with edge type CU_GRAPH_DEPENDENCY_TYPE_PROGRAMMATIC. See also CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT.
- #define CU_IPC_HANDLE_SIZE 64
-
CUDA IPC handle size
- #define CU_LAUNCH_PARAM_BUFFER_POINTER
-
Indicator that the next value in the extra parameter to cuLaunchKernel will be a pointer to a buffer containing all kernel parameters used for launching kernel f. This buffer needs to honor all alignment/padding requirements of the individual parameters. If CU_LAUNCH_PARAM_BUFFER_SIZE is not also specified in the extra array, then CU_LAUNCH_PARAM_BUFFER_POINTER will have no effect.
Value
((void*)CU_LAUNCH_PARAM_BUFFER_POINTER_AS_INT)
- #define CU_LAUNCH_PARAM_BUFFER_POINTER_AS_INT 0x01
-
C++ compile time constant for CU_LAUNCH_PARAM_BUFFER_POINTER
- #define CU_LAUNCH_PARAM_BUFFER_SIZE
-
Indicator that the next value in the extra parameter to cuLaunchKernel will be a pointer to a size_t which contains the size of the buffer specified with CU_LAUNCH_PARAM_BUFFER_POINTER. It is required that CU_LAUNCH_PARAM_BUFFER_POINTER also be specified in the extra array if the value associated with CU_LAUNCH_PARAM_BUFFER_SIZE is not zero.
Value
((void*)CU_LAUNCH_PARAM_BUFFER_SIZE_AS_INT)
- #define CU_LAUNCH_PARAM_BUFFER_SIZE_AS_INT 0x02
-
C++ compile time constant for CU_LAUNCH_PARAM_BUFFER_SIZE
- #define CU_LAUNCH_PARAM_END
-
End of array terminator for the extra parameter to cuLaunchKernel
Value
((void*)CU_LAUNCH_PARAM_END_AS_INT)
- #define CU_LAUNCH_PARAM_END_AS_INT 0x00
-
C++ compile time constant for CU_LAUNCH_PARAM_END
- #define CU_MEMHOSTALLOC_DEVICEMAP 0x02
-
If set, host memory is mapped into CUDA address space and cuMemHostGetDevicePointer() may be called on the host pointer. Flag for cuMemHostAlloc()
- #define CU_MEMHOSTALLOC_PORTABLE 0x01
-
If set, host memory is portable between CUDA contexts. Flag for cuMemHostAlloc()
- #define CU_MEMHOSTALLOC_WRITECOMBINED 0x04
-
If set, host memory is allocated as write-combined - fast to write, faster to DMA, slow to read except via SSE4 streaming load instruction (MOVNTDQA). Flag for cuMemHostAlloc()
- #define CU_MEMHOSTREGISTER_DEVICEMAP 0x02
-
If set, host memory is mapped into CUDA address space and cuMemHostGetDevicePointer() may be called on the host pointer. Flag for cuMemHostRegister()
- #define CU_MEMHOSTREGISTER_IOMEMORY 0x04
-
If set, the passed memory pointer is treated as pointing to some memory-mapped I/O space, e.g. belonging to a third-party PCIe device. On Windows the flag is a no-op. On Linux that memory is marked as non cache-coherent for the GPU and is expected to be physically contiguous. It may return CUDA_ERROR_NOT_PERMITTED if run as an unprivileged user, CUDA_ERROR_NOT_SUPPORTED on older Linux kernel versions. On all other platforms, it is not supported and CUDA_ERROR_NOT_SUPPORTED is returned. Flag for cuMemHostRegister()
- #define CU_MEMHOSTREGISTER_PORTABLE 0x01
-
If set, host memory is portable between CUDA contexts. Flag for cuMemHostRegister()
- #define CU_MEMHOSTREGISTER_READ_ONLY 0x08
-
If set, the passed memory pointer is treated as pointing to memory that is considered read-only by the device. On platforms without CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES, this flag is required in order to register memory mapped to the CPU as read-only. Support for the use of this flag can be queried from the device attribute CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED. Using this flag with a current context associated with a device that does not have this attribute set will cause cuMemHostRegister to error with CUDA_ERROR_NOT_SUPPORTED.
- #define CU_MEM_CREATE_USAGE_TILE_POOL 0x1
-
This flag if set indicates that the memory will be used as a tile pool.
- #define CU_PARAM_TR_DEFAULT -1
-
For texture references loaded into the module, use default texunit from texture reference.
- #define CU_STREAM_LEGACY ((CUstream)0x1)
-
Legacy stream handle
Stream handle that can be passed as a CUstream to use an implicit stream with legacy synchronization behavior.
See details of the synchronization behavior.
- #define CU_STREAM_PER_THREAD ((CUstream)0x2)
-
Per-thread stream handle
Stream handle that can be passed as a CUstream to use an implicit stream with per-thread synchronization behavior.
See details of the synchronization behavior.
- #define CU_TENSOR_MAP_NUM_QWORDS 16
-
Size of tensor map descriptor
- #define CU_TRSA_OVERRIDE_FORMAT 0x01
-
Override the texref format with a format inferred from the array. Flag for cuTexRefSetArray()
- #define CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION 0x20
-
Disable any trilinear filtering optimizations. Flag for cuTexRefSetFlags() and cuTexObjectCreate()
- #define CU_TRSF_NORMALIZED_COORDINATES 0x02
-
Use normalized texture coordinates in the range [0,1) instead of [0,dim). Flag for cuTexRefSetFlags() and cuTexObjectCreate()
- #define CU_TRSF_READ_AS_INTEGER 0x01
-
Read the texture as integers rather than promoting the values to floats in the range [0,1]. Flag for cuTexRefSetFlags() and cuTexObjectCreate()
- #define CU_TRSF_SEAMLESS_CUBEMAP 0x40
-
Enable seamless cube map filtering. Flag for cuTexObjectCreate()
- #define CU_TRSF_SRGB 0x10
-
Perform sRGB->linear conversion during texture read. Flag for cuTexRefSetFlags() and cuTexObjectCreate()
- #define MAX_PLANES 3
-
Maximum number of planes per frame
Typedefs
- typedef struct CUaccessPolicyWindow_v1 CUaccessPolicyWindow
-
Access policy window
- typedef CUarray_st * CUarray
-
CUDA array
- typedef CUctx_st * CUcontext
-
CUDA context
- typedef CUdevice_v1 CUdevice
-
CUDA device
- typedef int CUdevice_v1
-
CUDA device
- typedef CUdeviceptr_v2 CUdeviceptr
-
CUDA device pointer
- typedef unsigned int CUdeviceptr_v2
-
CUDA device pointer CUdeviceptr is defined as an unsigned integer type whose size matches the size of a pointer on the target platform.
- typedef CUeglStreamConnection_st * CUeglStreamConnection
-
CUDA EGLSream Connection
- typedef CUevent_st * CUevent
-
CUDA event
- typedef struct CUexecAffinityParam_v1 CUexecAffinityParam
-
Execution Affinity Parameters
- typedef CUextMemory_st * CUexternalMemory
-
CUDA external memory
- typedef CUextSemaphore_st * CUexternalSemaphore
-
CUDA external semaphore
- typedef CUfunc_st * CUfunction
-
CUDA function
- typedef CUgraph_st * CUgraph
-
CUDA graph
- typedef cuuint64_t CUgraphConditionalHandle
-
CUDA graph conditional handle
- typedef CUgraphExec_st * CUgraphExec
-
CUDA executable graph
- typedef CUgraphNode_st * CUgraphNode
-
CUDA graph node
- typedef CUgraphicsResource_st * CUgraphicsResource
-
CUDA graphics interop resource
- void(CUDA_CB* CUhostFn )( void* userData )
-
CUDA host function
- userData
- Argument value passed to the function
- typedef CUkern_st * CUkernel
-
CUDA kernel
- typedef CUlib_st * CUlibrary
-
CUDA library
- typedef CUmemPoolHandle_st * CUmemoryPool
-
CUDA memory pool
- typedef CUmipmappedArray_st * CUmipmappedArray
-
CUDA mipmapped array
- typedef CUmod_st * CUmodule
-
CUDA module
- size_t(CUDA_CB* CUoccupancyB2DSize )( int blockSize )
-
Block size to per-block dynamic shared memory mapping for a certain kernel
- blockSize
- Block size of the kernel.
- typedef CUstream_st * CUstream
-
CUDA stream
- void(CUDA_CB* CUstreamCallback )( CUstream hStream, CUresult status, void* userData )
-
CUDA stream callback
- hStream
- The stream the callback was added to, as passed to cuStreamAddCallback. May be NULL.
- CUresult status
- userData
- User parameter provided at registration.
- typedef CUsurfObject_v1 CUsurfObject
-
An opaque value that represents a CUDA surface object
- typedef unsigned long long CUsurfObject_v1
-
An opaque value that represents a CUDA surface object
- typedef CUsurfref_st * CUsurfref
-
CUDA surface reference
- typedef CUtexObject_v1 CUtexObject
-
An opaque value that represents a CUDA texture object
- typedef unsigned long long CUtexObject_v1
-
An opaque value that represents a CUDA texture object
- typedef CUtexref_st * CUtexref
-
CUDA texture reference
- typedef CUuserObject_st * CUuserObject
-
CUDA user object for graphs
Parameters
Parameters
Returns
The dynamic shared memory needed by a block.
Parameters
Enumerations
- enum CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS
-
Access flags that specify the level of access the current context's device has on the memory referenced.
Values
- CU_POINTER_ATTRIBUTE_ACCESS_FLAG_NONE = 0x0
- No access, meaning the device cannot access this memory at all, thus must be staged through accessible memory in order to complete certain operations
- CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READ = 0x1
- Read-only access, meaning writes to this memory are considered invalid accesses and thus return error in that case.
- CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READWRITE = 0x3
- Read-write access, the device has full read-write access to the memory
- enum CUGPUDirectRDMAWritesOrdering
-
Platform native ordering for GPUDirect RDMA writes
Values
- CU_GPU_DIRECT_RDMA_WRITES_ORDERING_NONE = 0
- The device does not natively support ordering of remote writes. cuFlushGPUDirectRDMAWrites() can be leveraged if supported.
- CU_GPU_DIRECT_RDMA_WRITES_ORDERING_OWNER = 100
- Natively, the device can consistently consume remote writes, although other CUDA devices may not.
- CU_GPU_DIRECT_RDMA_WRITES_ORDERING_ALL_DEVICES = 200
- Any CUDA device in the system can consistently consume remote writes to this device.
- enum CUaccessProperty
-
Specifies performance hint with CUaccessPolicyWindow for hitProp and missProp members.
Values
- CU_ACCESS_PROPERTY_NORMAL = 0
- Normal cache persistence.
- CU_ACCESS_PROPERTY_STREAMING = 1
- Streaming access is less likely to persit from cache.
- CU_ACCESS_PROPERTY_PERSISTING = 2
- Persisting access is more likely to persist in cache.
- enum CUaddress_mode
-
Texture reference addressing modes
Values
- CU_TR_ADDRESS_MODE_WRAP = 0
- Wrapping address mode
- CU_TR_ADDRESS_MODE_CLAMP = 1
- Clamp to edge address mode
- CU_TR_ADDRESS_MODE_MIRROR = 2
- Mirror address mode
- CU_TR_ADDRESS_MODE_BORDER = 3
- Border address mode
- enum CUarraySparseSubresourceType
-
Sparse subresource types
Values
- CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL = 0
- CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL = 1
- enum CUarray_cubemap_face
-
Array indices for cube faces
Values
- CU_CUBEMAP_FACE_POSITIVE_X = 0x00
- Positive X face of cubemap
- CU_CUBEMAP_FACE_NEGATIVE_X = 0x01
- Negative X face of cubemap
- CU_CUBEMAP_FACE_POSITIVE_Y = 0x02
- Positive Y face of cubemap
- CU_CUBEMAP_FACE_NEGATIVE_Y = 0x03
- Negative Y face of cubemap
- CU_CUBEMAP_FACE_POSITIVE_Z = 0x04
- Positive Z face of cubemap
- CU_CUBEMAP_FACE_NEGATIVE_Z = 0x05
- Negative Z face of cubemap
- enum CUarray_format
-
Array formats
Values
- CU_AD_FORMAT_UNSIGNED_INT8 = 0x01
- Unsigned 8-bit integers
- CU_AD_FORMAT_UNSIGNED_INT16 = 0x02
- Unsigned 16-bit integers
- CU_AD_FORMAT_UNSIGNED_INT32 = 0x03
- Unsigned 32-bit integers
- CU_AD_FORMAT_SIGNED_INT8 = 0x08
- Signed 8-bit integers
- CU_AD_FORMAT_SIGNED_INT16 = 0x09
- Signed 16-bit integers
- CU_AD_FORMAT_SIGNED_INT32 = 0x0a
- Signed 32-bit integers
- CU_AD_FORMAT_HALF = 0x10
- 16-bit floating point
- CU_AD_FORMAT_FLOAT = 0x20
- 32-bit floating point
- CU_AD_FORMAT_NV12 = 0xb0
- 8-bit YUV planar format, with 4:2:0 sampling
- CU_AD_FORMAT_UNORM_INT8X1 = 0xc0
- 1 channel unsigned 8-bit normalized integer
- CU_AD_FORMAT_UNORM_INT8X2 = 0xc1
- 2 channel unsigned 8-bit normalized integer
- CU_AD_FORMAT_UNORM_INT8X4 = 0xc2
- 4 channel unsigned 8-bit normalized integer
- CU_AD_FORMAT_UNORM_INT16X1 = 0xc3
- 1 channel unsigned 16-bit normalized integer
- CU_AD_FORMAT_UNORM_INT16X2 = 0xc4
- 2 channel unsigned 16-bit normalized integer
- CU_AD_FORMAT_UNORM_INT16X4 = 0xc5
- 4 channel unsigned 16-bit normalized integer
- CU_AD_FORMAT_SNORM_INT8X1 = 0xc6
- 1 channel signed 8-bit normalized integer
- CU_AD_FORMAT_SNORM_INT8X2 = 0xc7
- 2 channel signed 8-bit normalized integer
- CU_AD_FORMAT_SNORM_INT8X4 = 0xc8
- 4 channel signed 8-bit normalized integer
- CU_AD_FORMAT_SNORM_INT16X1 = 0xc9
- 1 channel signed 16-bit normalized integer
- CU_AD_FORMAT_SNORM_INT16X2 = 0xca
- 2 channel signed 16-bit normalized integer
- CU_AD_FORMAT_SNORM_INT16X4 = 0xcb
- 4 channel signed 16-bit normalized integer
- CU_AD_FORMAT_BC1_UNORM = 0x91
- 4 channel unsigned normalized block-compressed (BC1 compression) format
- CU_AD_FORMAT_BC1_UNORM_SRGB = 0x92
- 4 channel unsigned normalized block-compressed (BC1 compression) format with sRGB encoding
- CU_AD_FORMAT_BC2_UNORM = 0x93
- 4 channel unsigned normalized block-compressed (BC2 compression) format
- CU_AD_FORMAT_BC2_UNORM_SRGB = 0x94
- 4 channel unsigned normalized block-compressed (BC2 compression) format with sRGB encoding
- CU_AD_FORMAT_BC3_UNORM = 0x95
- 4 channel unsigned normalized block-compressed (BC3 compression) format
- CU_AD_FORMAT_BC3_UNORM_SRGB = 0x96
- 4 channel unsigned normalized block-compressed (BC3 compression) format with sRGB encoding
- CU_AD_FORMAT_BC4_UNORM = 0x97
- 1 channel unsigned normalized block-compressed (BC4 compression) format
- CU_AD_FORMAT_BC4_SNORM = 0x98
- 1 channel signed normalized block-compressed (BC4 compression) format
- CU_AD_FORMAT_BC5_UNORM = 0x99
- 2 channel unsigned normalized block-compressed (BC5 compression) format
- CU_AD_FORMAT_BC5_SNORM = 0x9a
- 2 channel signed normalized block-compressed (BC5 compression) format
- CU_AD_FORMAT_BC6H_UF16 = 0x9b
- 3 channel unsigned half-float block-compressed (BC6H compression) format
- CU_AD_FORMAT_BC6H_SF16 = 0x9c
- 3 channel signed half-float block-compressed (BC6H compression) format
- CU_AD_FORMAT_BC7_UNORM = 0x9d
- 4 channel unsigned normalized block-compressed (BC7 compression) format
- CU_AD_FORMAT_BC7_UNORM_SRGB = 0x9e
- 4 channel unsigned normalized block-compressed (BC7 compression) format with sRGB encoding
- enum CUclusterSchedulingPolicy
-
Cluster scheduling policies. These may be passed to cuFuncSetAttribute or cuKernelSetAttribute
Values
- CU_CLUSTER_SCHEDULING_POLICY_DEFAULT = 0
- the default policy
- CU_CLUSTER_SCHEDULING_POLICY_SPREAD = 1
- spread the blocks within a cluster to the SMs
- CU_CLUSTER_SCHEDULING_POLICY_LOAD_BALANCING = 2
- allow the hardware to load-balance the blocks in a cluster to the SMs
- enum CUcomputemode
-
Compute Modes
Values
- CU_COMPUTEMODE_DEFAULT = 0
- Default compute mode (Multiple contexts allowed per device)
- CU_COMPUTEMODE_PROHIBITED = 2
- Compute-prohibited mode (No contexts can be created on this device at this time)
- CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3
- Compute-exclusive-process mode (Only one context used by a single process can be present on this device at a time)
- enum CUctx_flags
-
Context creation flags
Values
- CU_CTX_SCHED_AUTO = 0x00
- Automatic scheduling
- CU_CTX_SCHED_SPIN = 0x01
- Set spin as default scheduling
- CU_CTX_SCHED_YIELD = 0x02
- Set yield as default scheduling
- CU_CTX_SCHED_BLOCKING_SYNC = 0x04
- Set blocking synchronization as default scheduling
- CU_CTX_BLOCKING_SYNC = 0x04
-
Deprecated
This flag was deprecated as of CUDA 4.0 and was replaced with CU_CTX_SCHED_BLOCKING_SYNC.
Set blocking synchronization as default scheduling
- CU_CTX_SCHED_MASK = 0x07
- CU_CTX_MAP_HOST = 0x08
-
Deprecated
This flag was deprecated as of CUDA 11.0 and it no longer has any effect. All contexts as of CUDA 3.2 behave as though the flag is enabled.
- CU_CTX_LMEM_RESIZE_TO_MAX = 0x10
- Keep local memory allocation after launch
- CU_CTX_COREDUMP_ENABLE = 0x20
- Trigger coredumps from exceptions in this context
- CU_CTX_USER_COREDUMP_ENABLE = 0x40
- Enable user pipe to trigger coredumps in this context
- CU_CTX_SYNC_MEMOPS = 0x80
- Ensure synchronous memory operations on this context will synchronize
- CU_CTX_FLAGS_MASK = 0xFF
- enum CUdevice_P2PAttribute
-
P2P Attributes
Values
- CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK = 0x01
- A relative value indicating the performance of the link between two devices
- CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED = 0x02
- P2P Access is enable
- CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED = 0x03
- Atomic operation over the link supported
- CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED = 0x04
-
Deprecated
use CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED instead
- CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED = 0x04
- Accessing CUDA arrays over the link supported
- enum CUdevice_attribute
-
Device properties
Values
- CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1
- Maximum number of threads per block
- CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2
- Maximum block dimension X
- CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3
- Maximum block dimension Y
- CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4
- Maximum block dimension Z
- CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5
- Maximum grid dimension X
- CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6
- Maximum grid dimension Y
- CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7
- Maximum grid dimension Z
- CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8
- Maximum shared memory available per block in bytes
- CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8
- Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK
- CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9
- Memory available on device for __constant__ variables in a CUDA C kernel in bytes
- CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10
- Warp size in threads
- CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11
- Maximum pitch in bytes allowed by memory copies
- CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12
- Maximum number of 32-bit registers available per block
- CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12
- Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK
- CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13
- Typical clock frequency in kilohertz
- CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14
- Alignment requirement for textures
- CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15
- Device can possibly copy memory and execute a kernel concurrently. Deprecated. Use instead CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT.
- CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16
- Number of multiprocessors on device
- CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17
- Specifies whether there is a run time limit on kernels
- CU_DEVICE_ATTRIBUTE_INTEGRATED = 18
- Device is integrated with host memory
- CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19
- Device can map host memory into CUDA address space
- CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20
- Compute mode (See CUcomputemode for details)
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21
- Maximum 1D texture width
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22
- Maximum 2D texture width
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23
- Maximum 2D texture height
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24
- Maximum 3D texture width
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25
- Maximum 3D texture height
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26
- Maximum 3D texture depth
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27
- Maximum 2D layered texture width
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28
- Maximum 2D layered texture height
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29
- Maximum layers in a 2D layered texture
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27
- Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28
- Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29
- Deprecated, use CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS
- CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30
- Alignment requirement for surfaces
- CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31
- Device can possibly execute multiple kernels concurrently
- CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32
- Device has ECC support enabled
- CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33
- PCI bus ID of the device
- CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34
- PCI device ID of the device
- CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35
- Device is using TCC driver model
- CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36
- Peak memory clock frequency in kilohertz
- CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37
- Global memory bus width in bits
- CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38
- Size of L2 cache in bytes
- CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39
- Maximum resident threads per multiprocessor
- CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40
- Number of asynchronous engines
- CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41
- Device shares a unified address space with the host
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42
- Maximum 1D layered texture width
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43
- Maximum layers in a 1D layered texture
- CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44
- Deprecated, do not use.
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45
- Maximum 2D texture width if CUDA_ARRAY3D_TEXTURE_GATHER is set
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46
- Maximum 2D texture height if CUDA_ARRAY3D_TEXTURE_GATHER is set
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47
- Alternate maximum 3D texture width
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48
- Alternate maximum 3D texture height
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49
- Alternate maximum 3D texture depth
- CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50
- PCI domain ID of the device
- CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51
- Pitch alignment requirement for textures
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52
- Maximum cubemap texture width/height
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53
- Maximum cubemap layered texture width/height
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54
- Maximum layers in a cubemap layered texture
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55
- Maximum 1D surface width
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56
- Maximum 2D surface width
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57
- Maximum 2D surface height
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58
- Maximum 3D surface width
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59
- Maximum 3D surface height
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60
- Maximum 3D surface depth
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61
- Maximum 1D layered surface width
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62
- Maximum layers in a 1D layered surface
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63
- Maximum 2D layered surface width
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64
- Maximum 2D layered surface height
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65
- Maximum layers in a 2D layered surface
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66
- Maximum cubemap surface width
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67
- Maximum cubemap layered surface width
- CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68
- Maximum layers in a cubemap layered surface
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69
- Deprecated, do not use. Use cudaDeviceGetTexture1DLinearMaxWidth() or cuDeviceGetTexture1DLinearMaxWidth() instead.
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70
- Maximum 2D linear texture width
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71
- Maximum 2D linear texture height
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72
- Maximum 2D linear texture pitch in bytes
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73
- Maximum mipmapped 2D texture width
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74
- Maximum mipmapped 2D texture height
- CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75
- Major compute capability version number
- CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76
- Minor compute capability version number
- CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77
- Maximum mipmapped 1D texture width
- CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78
- Device supports stream priorities
- CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79
- Device supports caching globals in L1
- CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80
- Device supports caching locals in L1
- CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81
- Maximum shared memory available per multiprocessor in bytes
- CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82
- Maximum number of 32-bit registers available per multiprocessor
- CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83
- Device can allocate managed memory on this system
- CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84
- Device is on a multi-GPU board
- CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85
- Unique id for a group of devices on the same multi-GPU board
- CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86
- Link between the device and the host supports native atomic operations (this is a placeholder attribute, and is not supported on any current hardware)
- CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87
- Ratio of single precision performance (in floating-point operations per second) to double precision performance
- CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88
- Device supports coherently accessing pageable memory without calling cudaHostRegister on it
- CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89
- Device can coherently access managed memory concurrently with the CPU
- CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90
- Device supports compute preemption.
- CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91
- Device can access host registered memory at the same virtual address as the CPU
- CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS_V1 = 92
- Deprecated, along with v1 MemOps API, cuStreamBatchMemOp and related APIs are supported.
- CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS_V1 = 93
- Deprecated, along with v1 MemOps API, 64-bit operations are supported in cuStreamBatchMemOp and related APIs.
- CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V1 = 94
- Deprecated, along with v1 MemOps API, CU_STREAM_WAIT_VALUE_NOR is supported.
- CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95
- Device supports launching cooperative kernels via cuLaunchCooperativeKernel
- CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96
- Deprecated, cuLaunchCooperativeKernelMultiDevice is deprecated.
- CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97
- Maximum optin shared memory per block
- CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = 98
- The CU_STREAM_WAIT_VALUE_FLUSH flag and the CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES MemOp are supported on the device. See Stream Memory Operations for additional details.
- CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = 99
- Device supports host memory registration via cudaHostRegister.
- CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100
- Device accesses pageable memory via the host's page tables.
- CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 101
- The host can directly access managed memory on the device without migration.
- CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED = 102
- Deprecated, Use CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED
- CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED = 102
- Device supports virtual memory management APIs like cuMemAddressReserve, cuMemCreate, cuMemMap and related APIs
- CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = 103
- Device supports exporting memory to a posix file descriptor with cuMemExportToShareableHandle, if requested via cuMemCreate
- CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = 104
- Device supports exporting memory to a Win32 NT handle with cuMemExportToShareableHandle, if requested via cuMemCreate
- CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = 105
- Device supports exporting memory to a Win32 KMT handle with cuMemExportToShareableHandle, if requested via cuMemCreate
- CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR = 106
- Maximum number of blocks per multiprocessor
- CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED = 107
- Device supports compression of memory
- CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE = 108
- Maximum L2 persisting lines capacity setting in bytes.
- CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE = 109
- Maximum value of CUaccessPolicyWindow::num_bytes.
- CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED = 110
- Device supports specifying the GPUDirect RDMA flag with cuMemCreate
- CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK = 111
- Shared memory reserved by CUDA driver per block in bytes
- CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED = 112
- Device supports sparse CUDA arrays and sparse CUDA mipmapped arrays
- CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED = 113
- Device supports using the cuMemHostRegister flag CU_MEMHOSTERGISTER_READ_ONLY to register memory that must be mapped as read-only to the GPU
- CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED = 114
- External timeline semaphore interop is supported on the device
- CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED = 115
- Device supports using the cuMemAllocAsync and cuMemPool family of APIs
- CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED = 116
- Device supports GPUDirect RDMA APIs, like nvidia_p2p_get_pages (see https://docs.nvidia.com/cuda/gpudirect-rdma for more information)
- CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS = 117
- The returned attribute shall be interpreted as a bitmask, where the individual bits are described by the CUflushGPUDirectRDMAWritesOptions enum
- CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING = 118
- GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope indicated by the returned attribute. See CUGPUDirectRDMAWritesOrdering for the numerical values returned here.
- CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES = 119
- Handle types supported with mempool based IPC
- CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH = 120
- Indicates device supports cluster launch
- CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED = 121
- Device supports deferred mapping CUDA arrays and CUDA mipmapped arrays
- CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = 122
- 64-bit operations are supported in cuStreamBatchMemOp and related MemOp APIs.
- CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 123
- CU_STREAM_WAIT_VALUE_NOR is supported by MemOp APIs.
- CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED = 124
- Device supports buffer sharing with dma_buf mechanism.
- CU_DEVICE_ATTRIBUTE_IPC_EVENT_SUPPORTED = 125
- Device supports IPC Events.
- CU_DEVICE_ATTRIBUTE_MEM_SYNC_DOMAIN_COUNT = 126
- Number of memory domains the device supports.
- CU_DEVICE_ATTRIBUTE_TENSOR_MAP_ACCESS_SUPPORTED = 127
- Device supports accessing memory using Tensor Map.
- CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_FABRIC_SUPPORTED = 128
- Device supports exporting memory to a fabric handle with cuMemExportToShareableHandle() or requested with cuMemCreate()
- CU_DEVICE_ATTRIBUTE_UNIFIED_FUNCTION_POINTERS = 129
- Device supports unified function pointers.
- CU_DEVICE_ATTRIBUTE_NUMA_CONFIG = 130
- CU_DEVICE_ATTRIBUTE_NUMA_ID = 131
- CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED = 132
- Device supports switch multicast and reduction operations.
- CU_DEVICE_ATTRIBUTE_MPS_ENABLED = 133
- Indicates if contexts created on this device will be shared via MPS
- CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID = 134
- NUMA ID of the host node closest to the device. Returns -1 when system does not support NUMA.
- CU_DEVICE_ATTRIBUTE_MAX
- enum CUdriverProcAddressQueryResult
-
Flags to indicate search status. For more details see cuGetProcAddress
Values
- CU_GET_PROC_ADDRESS_SUCCESS = 0
- Symbol was succesfully found
- CU_GET_PROC_ADDRESS_SYMBOL_NOT_FOUND = 1
- Symbol was not found in search
- CU_GET_PROC_ADDRESS_VERSION_NOT_SUFFICIENT = 2
- Symbol was found but version supplied was not sufficient
- enum CUdriverProcAddress_flags
-
Flags to specify search options. For more details see cuGetProcAddress
Values
- CU_GET_PROC_ADDRESS_DEFAULT = 0
- Default search mode for driver symbols.
- CU_GET_PROC_ADDRESS_LEGACY_STREAM = 1<<0
- Search for legacy versions of driver symbols.
- CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM = 1<<1
- Search for per-thread versions of driver symbols.
- enum CUeglColorFormat
-
CUDA EGL Color Format - The different planar and multiplanar formats currently supported for CUDA_EGL interops. Three channel formats are currently not supported for CU_EGL_FRAME_TYPE_ARRAY
Values
- CU_EGL_COLOR_FORMAT_YUV420_PLANAR = 0x00
- Y, U, V in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR = 0x01
- Y, UV in two surfaces (UV as one surface) with VU byte ordering, width, height ratio same as YUV420Planar.
- CU_EGL_COLOR_FORMAT_YUV422_PLANAR = 0x02
- Y, U, V each in a separate surface, U/V width = 1/2 Y width, U/V height = Y height.
- CU_EGL_COLOR_FORMAT_YUV422_SEMIPLANAR = 0x03
- Y, UV in two surfaces with VU byte ordering, width, height ratio same as YUV422Planar.
- CU_EGL_COLOR_FORMAT_RGB = 0x04
- R/G/B three channels in one surface with BGR byte ordering. Only pitch linear format supported.
- CU_EGL_COLOR_FORMAT_BGR = 0x05
- R/G/B three channels in one surface with RGB byte ordering. Only pitch linear format supported.
- CU_EGL_COLOR_FORMAT_ARGB = 0x06
- R/G/B/A four channels in one surface with BGRA byte ordering.
- CU_EGL_COLOR_FORMAT_RGBA = 0x07
- R/G/B/A four channels in one surface with ABGR byte ordering.
- CU_EGL_COLOR_FORMAT_L = 0x08
- single luminance channel in one surface.
- CU_EGL_COLOR_FORMAT_R = 0x09
- single color channel in one surface.
- CU_EGL_COLOR_FORMAT_YUV444_PLANAR = 0x0A
- Y, U, V in three surfaces, each in a separate surface, U/V width = Y width, U/V height = Y height.
- CU_EGL_COLOR_FORMAT_YUV444_SEMIPLANAR = 0x0B
- Y, UV in two surfaces (UV as one surface) with VU byte ordering, width, height ratio same as YUV444Planar.
- CU_EGL_COLOR_FORMAT_YUYV_422 = 0x0C
- Y, U, V in one surface, interleaved as UYVY in one channel.
- CU_EGL_COLOR_FORMAT_UYVY_422 = 0x0D
- Y, U, V in one surface, interleaved as YUYV in one channel.
- CU_EGL_COLOR_FORMAT_ABGR = 0x0E
- R/G/B/A four channels in one surface with RGBA byte ordering.
- CU_EGL_COLOR_FORMAT_BGRA = 0x0F
- R/G/B/A four channels in one surface with ARGB byte ordering.
- CU_EGL_COLOR_FORMAT_A = 0x10
- Alpha color format - one channel in one surface.
- CU_EGL_COLOR_FORMAT_RG = 0x11
- R/G color format - two channels in one surface with GR byte ordering
- CU_EGL_COLOR_FORMAT_AYUV = 0x12
- Y, U, V, A four channels in one surface, interleaved as VUYA.
- CU_EGL_COLOR_FORMAT_YVU444_SEMIPLANAR = 0x13
- Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = Y width, U/V height = Y height.
- CU_EGL_COLOR_FORMAT_YVU422_SEMIPLANAR = 0x14
- Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = Y height.
- CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR = 0x15
- Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR = 0x16
- Y10, V10U10 in two surfaces (VU as one surface) with UV byte ordering, U/V width = Y width, U/V height = Y height.
- CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR = 0x17
- Y10, V10U10 in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR = 0x18
- Y12, V12U12 in two surfaces (VU as one surface) with UV byte ordering, U/V width = Y width, U/V height = Y height.
- CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR = 0x19
- Y12, V12U12 in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- CU_EGL_COLOR_FORMAT_VYUY_ER = 0x1A
- Extended Range Y, U, V in one surface, interleaved as YVYU in one channel.
- CU_EGL_COLOR_FORMAT_UYVY_ER = 0x1B
- Extended Range Y, U, V in one surface, interleaved as YUYV in one channel.
- CU_EGL_COLOR_FORMAT_YUYV_ER = 0x1C
- Extended Range Y, U, V in one surface, interleaved as UYVY in one channel.
- CU_EGL_COLOR_FORMAT_YVYU_ER = 0x1D
- Extended Range Y, U, V in one surface, interleaved as VYUY in one channel.
- CU_EGL_COLOR_FORMAT_YUV_ER = 0x1E
- Extended Range Y, U, V three channels in one surface, interleaved as VUY. Only pitch linear format supported.
- CU_EGL_COLOR_FORMAT_YUVA_ER = 0x1F
- Extended Range Y, U, V, A four channels in one surface, interleaved as AVUY.
- CU_EGL_COLOR_FORMAT_AYUV_ER = 0x20
- Extended Range Y, U, V, A four channels in one surface, interleaved as VUYA.
- CU_EGL_COLOR_FORMAT_YUV444_PLANAR_ER = 0x21
- Extended Range Y, U, V in three surfaces, U/V width = Y width, U/V height = Y height.
- CU_EGL_COLOR_FORMAT_YUV422_PLANAR_ER = 0x22
- Extended Range Y, U, V in three surfaces, U/V width = 1/2 Y width, U/V height = Y height.
- CU_EGL_COLOR_FORMAT_YUV420_PLANAR_ER = 0x23
- Extended Range Y, U, V in three surfaces, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- CU_EGL_COLOR_FORMAT_YUV444_SEMIPLANAR_ER = 0x24
- Extended Range Y, UV in two surfaces (UV as one surface) with VU byte ordering, U/V width = Y width, U/V height = Y height.
- CU_EGL_COLOR_FORMAT_YUV422_SEMIPLANAR_ER = 0x25
- Extended Range Y, UV in two surfaces (UV as one surface) with VU byte ordering, U/V width = 1/2 Y width, U/V height = Y height.
- CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_ER = 0x26
- Extended Range Y, UV in two surfaces (UV as one surface) with VU byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- CU_EGL_COLOR_FORMAT_YVU444_PLANAR_ER = 0x27
- Extended Range Y, V, U in three surfaces, U/V width = Y width, U/V height = Y height.
- CU_EGL_COLOR_FORMAT_YVU422_PLANAR_ER = 0x28
- Extended Range Y, V, U in three surfaces, U/V width = 1/2 Y width, U/V height = Y height.
- CU_EGL_COLOR_FORMAT_YVU420_PLANAR_ER = 0x29
- Extended Range Y, V, U in three surfaces, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- CU_EGL_COLOR_FORMAT_YVU444_SEMIPLANAR_ER = 0x2A
- Extended Range Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = Y width, U/V height = Y height.
- CU_EGL_COLOR_FORMAT_YVU422_SEMIPLANAR_ER = 0x2B
- Extended Range Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = Y height.
- CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_ER = 0x2C
- Extended Range Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- CU_EGL_COLOR_FORMAT_BAYER_RGGB = 0x2D
- Bayer format - one channel in one surface with interleaved RGGB ordering.
- CU_EGL_COLOR_FORMAT_BAYER_BGGR = 0x2E
- Bayer format - one channel in one surface with interleaved BGGR ordering.
- CU_EGL_COLOR_FORMAT_BAYER_GRBG = 0x2F
- Bayer format - one channel in one surface with interleaved GRBG ordering.
- CU_EGL_COLOR_FORMAT_BAYER_GBRG = 0x30
- Bayer format - one channel in one surface with interleaved GBRG ordering.
- CU_EGL_COLOR_FORMAT_BAYER10_RGGB = 0x31
- Bayer10 format - one channel in one surface with interleaved RGGB ordering. Out of 16 bits, 10 bits used 6 bits No-op.
- CU_EGL_COLOR_FORMAT_BAYER10_BGGR = 0x32
- Bayer10 format - one channel in one surface with interleaved BGGR ordering. Out of 16 bits, 10 bits used 6 bits No-op.
- CU_EGL_COLOR_FORMAT_BAYER10_GRBG = 0x33
- Bayer10 format - one channel in one surface with interleaved GRBG ordering. Out of 16 bits, 10 bits used 6 bits No-op.
- CU_EGL_COLOR_FORMAT_BAYER10_GBRG = 0x34
- Bayer10 format - one channel in one surface with interleaved GBRG ordering. Out of 16 bits, 10 bits used 6 bits No-op.
- CU_EGL_COLOR_FORMAT_BAYER12_RGGB = 0x35
- Bayer12 format - one channel in one surface with interleaved RGGB ordering. Out of 16 bits, 12 bits used 4 bits No-op.
- CU_EGL_COLOR_FORMAT_BAYER12_BGGR = 0x36
- Bayer12 format - one channel in one surface with interleaved BGGR ordering. Out of 16 bits, 12 bits used 4 bits No-op.
- CU_EGL_COLOR_FORMAT_BAYER12_GRBG = 0x37
- Bayer12 format - one channel in one surface with interleaved GRBG ordering. Out of 16 bits, 12 bits used 4 bits No-op.
- CU_EGL_COLOR_FORMAT_BAYER12_GBRG = 0x38
- Bayer12 format - one channel in one surface with interleaved GBRG ordering. Out of 16 bits, 12 bits used 4 bits No-op.
- CU_EGL_COLOR_FORMAT_BAYER14_RGGB = 0x39
- Bayer14 format - one channel in one surface with interleaved RGGB ordering. Out of 16 bits, 14 bits used 2 bits No-op.
- CU_EGL_COLOR_FORMAT_BAYER14_BGGR = 0x3A
- Bayer14 format - one channel in one surface with interleaved BGGR ordering. Out of 16 bits, 14 bits used 2 bits No-op.
- CU_EGL_COLOR_FORMAT_BAYER14_GRBG = 0x3B
- Bayer14 format - one channel in one surface with interleaved GRBG ordering. Out of 16 bits, 14 bits used 2 bits No-op.
- CU_EGL_COLOR_FORMAT_BAYER14_GBRG = 0x3C
- Bayer14 format - one channel in one surface with interleaved GBRG ordering. Out of 16 bits, 14 bits used 2 bits No-op.
- CU_EGL_COLOR_FORMAT_BAYER20_RGGB = 0x3D
- Bayer20 format - one channel in one surface with interleaved RGGB ordering. Out of 32 bits, 20 bits used 12 bits No-op.
- CU_EGL_COLOR_FORMAT_BAYER20_BGGR = 0x3E
- Bayer20 format - one channel in one surface with interleaved BGGR ordering. Out of 32 bits, 20 bits used 12 bits No-op.
- CU_EGL_COLOR_FORMAT_BAYER20_GRBG = 0x3F
- Bayer20 format - one channel in one surface with interleaved GRBG ordering. Out of 32 bits, 20 bits used 12 bits No-op.
- CU_EGL_COLOR_FORMAT_BAYER20_GBRG = 0x40
- Bayer20 format - one channel in one surface with interleaved GBRG ordering. Out of 32 bits, 20 bits used 12 bits No-op.
- CU_EGL_COLOR_FORMAT_YVU444_PLANAR = 0x41
- Y, V, U in three surfaces, each in a separate surface, U/V width = Y width, U/V height = Y height.
- CU_EGL_COLOR_FORMAT_YVU422_PLANAR = 0x42
- Y, V, U in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = Y height.
- CU_EGL_COLOR_FORMAT_YVU420_PLANAR = 0x43
- Y, V, U in three surfaces, each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- CU_EGL_COLOR_FORMAT_BAYER_ISP_RGGB = 0x44
- Nvidia proprietary Bayer ISP format - one channel in one surface with interleaved RGGB ordering and mapped to opaque integer datatype.
- CU_EGL_COLOR_FORMAT_BAYER_ISP_BGGR = 0x45
- Nvidia proprietary Bayer ISP format - one channel in one surface with interleaved BGGR ordering and mapped to opaque integer datatype.
- CU_EGL_COLOR_FORMAT_BAYER_ISP_GRBG = 0x46
- Nvidia proprietary Bayer ISP format - one channel in one surface with interleaved GRBG ordering and mapped to opaque integer datatype.
- CU_EGL_COLOR_FORMAT_BAYER_ISP_GBRG = 0x47
- Nvidia proprietary Bayer ISP format - one channel in one surface with interleaved GBRG ordering and mapped to opaque integer datatype.
- CU_EGL_COLOR_FORMAT_BAYER_BCCR = 0x48
- Bayer format - one channel in one surface with interleaved BCCR ordering.
- CU_EGL_COLOR_FORMAT_BAYER_RCCB = 0x49
- Bayer format - one channel in one surface with interleaved RCCB ordering.
- CU_EGL_COLOR_FORMAT_BAYER_CRBC = 0x4A
- Bayer format - one channel in one surface with interleaved CRBC ordering.
- CU_EGL_COLOR_FORMAT_BAYER_CBRC = 0x4B
- Bayer format - one channel in one surface with interleaved CBRC ordering.
- CU_EGL_COLOR_FORMAT_BAYER10_CCCC = 0x4C
- Bayer10 format - one channel in one surface with interleaved CCCC ordering. Out of 16 bits, 10 bits used 6 bits No-op.
- CU_EGL_COLOR_FORMAT_BAYER12_BCCR = 0x4D
- Bayer12 format - one channel in one surface with interleaved BCCR ordering. Out of 16 bits, 12 bits used 4 bits No-op.
- CU_EGL_COLOR_FORMAT_BAYER12_RCCB = 0x4E
- Bayer12 format - one channel in one surface with interleaved RCCB ordering. Out of 16 bits, 12 bits used 4 bits No-op.
- CU_EGL_COLOR_FORMAT_BAYER12_CRBC = 0x4F
- Bayer12 format - one channel in one surface with interleaved CRBC ordering. Out of 16 bits, 12 bits used 4 bits No-op.
- CU_EGL_COLOR_FORMAT_BAYER12_CBRC = 0x50
- Bayer12 format - one channel in one surface with interleaved CBRC ordering. Out of 16 bits, 12 bits used 4 bits No-op.
- CU_EGL_COLOR_FORMAT_BAYER12_CCCC = 0x51
- Bayer12 format - one channel in one surface with interleaved CCCC ordering. Out of 16 bits, 12 bits used 4 bits No-op.
- CU_EGL_COLOR_FORMAT_Y = 0x52
- Color format for single Y plane.
- CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_2020 = 0x53
- Y, UV in two surfaces (UV as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_2020 = 0x54
- Y, VU in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- CU_EGL_COLOR_FORMAT_YUV420_PLANAR_2020 = 0x55
- Y, U, V each in a separate surface, U/V width = 1/2 Y width, U/V height= 1/2 Y height.
- CU_EGL_COLOR_FORMAT_YVU420_PLANAR_2020 = 0x56
- Y, V, U each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- CU_EGL_COLOR_FORMAT_YUV420_SEMIPLANAR_709 = 0x57
- Y, UV in two surfaces (UV as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- CU_EGL_COLOR_FORMAT_YVU420_SEMIPLANAR_709 = 0x58
- Y, VU in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- CU_EGL_COLOR_FORMAT_YUV420_PLANAR_709 = 0x59
- Y, U, V each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- CU_EGL_COLOR_FORMAT_YVU420_PLANAR_709 = 0x5A
- Y, V, U each in a separate surface, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_709 = 0x5B
- Y10, V10U10 in two surfaces (VU as one surface), U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_2020 = 0x5C
- Y10, V10U10 in two surfaces (VU as one surface), U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR_2020 = 0x5D
- Y10, V10U10 in two surfaces(VU as one surface) U/V width = 1/2 Y width, U/V height = Y height.
- CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR = 0x5E
- Y10, V10U10 in two surfaces(VU as one surface) U/V width = 1/2 Y width, U/V height = Y height.
- CU_EGL_COLOR_FORMAT_Y10V10U10_422_SEMIPLANAR_709 = 0x5F
- Y10, V10U10 in two surfaces(VU as one surface) U/V width = 1/2 Y width, U/V height = Y height.
- CU_EGL_COLOR_FORMAT_Y_ER = 0x60
- Extended Range Color format for single Y plane.
- CU_EGL_COLOR_FORMAT_Y_709_ER = 0x61
- Extended Range Color format for single Y plane.
- CU_EGL_COLOR_FORMAT_Y10_ER = 0x62
- Extended Range Color format for single Y10 plane.
- CU_EGL_COLOR_FORMAT_Y10_709_ER = 0x63
- Extended Range Color format for single Y10 plane.
- CU_EGL_COLOR_FORMAT_Y12_ER = 0x64
- Extended Range Color format for single Y12 plane.
- CU_EGL_COLOR_FORMAT_Y12_709_ER = 0x65
- Extended Range Color format for single Y12 plane.
- CU_EGL_COLOR_FORMAT_YUVA = 0x66
- Y, U, V, A four channels in one surface, interleaved as AVUY.
- CU_EGL_COLOR_FORMAT_YUV = 0x67
- Y, U, V three channels in one surface, interleaved as VUY. Only pitch linear format supported.
- CU_EGL_COLOR_FORMAT_YVYU = 0x68
- Y, U, V in one surface, interleaved as YVYU in one channel.
- CU_EGL_COLOR_FORMAT_VYUY = 0x69
- Y, U, V in one surface, interleaved as VYUY in one channel.
- CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_ER = 0x6A
- Extended Range Y10, V10U10 in two surfaces(VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- CU_EGL_COLOR_FORMAT_Y10V10U10_420_SEMIPLANAR_709_ER = 0x6B
- Extended Range Y10, V10U10 in two surfaces(VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR_ER = 0x6C
- Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V width = Y width, U/V height = Y height.
- CU_EGL_COLOR_FORMAT_Y10V10U10_444_SEMIPLANAR_709_ER = 0x6D
- Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V width = Y width, U/V height = Y height.
- CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR_ER = 0x6E
- Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- CU_EGL_COLOR_FORMAT_Y12V12U12_420_SEMIPLANAR_709_ER = 0x6F
- Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V width = 1/2 Y width, U/V height = 1/2 Y height.
- CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR_ER = 0x70
- Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V width = Y width, U/V height = Y height.
- CU_EGL_COLOR_FORMAT_Y12V12U12_444_SEMIPLANAR_709_ER = 0x71
- Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V width = Y width, U/V height = Y height.
- CU_EGL_COLOR_FORMAT_MAX
- enum CUeglFrameType
-
CUDA EglFrame type - array or pointer
Values
- CU_EGL_FRAME_TYPE_ARRAY = 0
- Frame type CUDA array
- CU_EGL_FRAME_TYPE_PITCH = 1
- Frame type pointer
- enum CUeglResourceLocationFlags
-
Resource location flags- sysmem or vidmem
For CUDA context on iGPU, since video and system memory are equivalent - these flags will not have an effect on the execution.
For CUDA context on dGPU, applications can use the flag CUeglResourceLocationFlags to give a hint about the desired location.
CU_EGL_RESOURCE_LOCATION_SYSMEM - the frame data is made resident on the system memory to be accessed by CUDA.
CU_EGL_RESOURCE_LOCATION_VIDMEM - the frame data is made resident on the dedicated video memory to be accessed by CUDA.
There may be an additional latency due to new allocation and data migration, if the frame is produced on a different memory.
Values
- CU_EGL_RESOURCE_LOCATION_SYSMEM = 0x00
- Resource location sysmem
- CU_EGL_RESOURCE_LOCATION_VIDMEM = 0x01
- Resource location vidmem
- enum CUevent_flags
-
Event creation flags
Values
- CU_EVENT_DEFAULT = 0x0
- Default event flag
- CU_EVENT_BLOCKING_SYNC = 0x1
- Event uses blocking synchronization
- CU_EVENT_DISABLE_TIMING = 0x2
- Event will not record timing data
- CU_EVENT_INTERPROCESS = 0x4
- Event is suitable for interprocess use. CU_EVENT_DISABLE_TIMING must be set
- enum CUevent_record_flags
-
Event record flags
Values
- CU_EVENT_RECORD_DEFAULT = 0x0
- Default event record flag
- CU_EVENT_RECORD_EXTERNAL = 0x1
- When using stream capture, create an event record node instead of the default behavior. This flag is invalid when used outside of capture.
- enum CUevent_sched_flags
-
Event sched flags
Values
- CU_EVENT_SCHED_AUTO = 0x00
- Automatic scheduling
- CU_EVENT_SCHED_SPIN = 0x01
- Set spin as default scheduling
- CU_EVENT_SCHED_YIELD = 0x02
- Set yield as default scheduling
- CU_EVENT_SCHED_BLOCKING_SYNC = 0x04
- Set blocking synchronization as default scheduling
- enum CUevent_wait_flags
-
Event wait flags
Values
- CU_EVENT_WAIT_DEFAULT = 0x0
- Default event wait flag
- CU_EVENT_WAIT_EXTERNAL = 0x1
- When using stream capture, create an event wait node instead of the default behavior. This flag is invalid when used outside of capture.
- enum CUexecAffinityType
-
Execution Affinity Types
Values
- CU_EXEC_AFFINITY_TYPE_SM_COUNT = 0
- Create a context with limited SMs.
- CU_EXEC_AFFINITY_TYPE_MAX
- enum CUexternalMemoryHandleType
-
External memory handle types
Values
- CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD = 1
- Handle is an opaque file descriptor
- CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 = 2
- Handle is an opaque shared NT handle
- CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3
- Handle is an opaque, globally shared handle
- CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP = 4
- Handle is a D3D12 heap object
- CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE = 5
- Handle is a D3D12 committed resource
- CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE = 6
- Handle is a shared NT handle to a D3D11 resource
- CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT = 7
- Handle is a globally shared handle to a D3D11 resource
- CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF = 8
- Handle is an NvSciBuf object
- enum CUexternalSemaphoreHandleType
-
External semaphore handle types
Values
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD = 1
- Handle is an opaque file descriptor
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32 = 2
- Handle is an opaque shared NT handle
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3
- Handle is an opaque, globally shared handle
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE = 4
- Handle is a shared NT handle referencing a D3D12 fence object
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE = 5
- Handle is a shared NT handle referencing a D3D11 fence object
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC = 6
- Opaque handle to NvSciSync Object
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX = 7
- Handle is a shared NT handle referencing a D3D11 keyed mutex object
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT = 8
- Handle is a globally shared handle referencing a D3D11 keyed mutex object
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD = 9
- Handle is an opaque file descriptor referencing a timeline semaphore
- CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 = 10
- Handle is an opaque shared NT handle referencing a timeline semaphore
- enum CUfilter_mode
-
Texture reference filtering modes
Values
- CU_TR_FILTER_MODE_POINT = 0
- Point filter mode
- CU_TR_FILTER_MODE_LINEAR = 1
- Linear filter mode
- enum CUflushGPUDirectRDMAWritesOptions
-
Values
- CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST = 1<<0
- cuFlushGPUDirectRDMAWrites() and its CUDA Runtime API counterpart are supported on the device.
- CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_MEMOPS = 1<<1
- The CU_STREAM_WAIT_VALUE_FLUSH flag and the CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES MemOp are supported on the device.
- enum CUflushGPUDirectRDMAWritesScope
-
The scopes for cuFlushGPUDirectRDMAWrites
Values
- CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_OWNER = 100
- Blocks until remote writes are visible to the CUDA device context owning the data.
- CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_ALL_DEVICES = 200
- Blocks until remote writes are visible to all CUDA device contexts.
- enum CUflushGPUDirectRDMAWritesTarget
-
The targets for cuFlushGPUDirectRDMAWrites
Values
- CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TARGET_CURRENT_CTX = 0
- Sets the target for cuFlushGPUDirectRDMAWrites() to the currently active CUDA device context.
- enum CUfunc_cache
-
Function cache configurations
Values
- CU_FUNC_CACHE_PREFER_NONE = 0x00
- no preference for shared memory or L1 (default)
- CU_FUNC_CACHE_PREFER_SHARED = 0x01
- prefer larger shared memory and smaller L1 cache
- CU_FUNC_CACHE_PREFER_L1 = 0x02
- prefer larger L1 cache and smaller shared memory
- CU_FUNC_CACHE_PREFER_EQUAL = 0x03
- prefer equal sized L1 cache and shared memory
- enum CUfunction_attribute
-
Function properties
Values
- CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0
- The maximum number of threads per block, beyond which a launch of the function would fail. This number depends on both the function and the device on which the function is currently loaded.
- CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1
- The size in bytes of statically-allocated shared memory required by this function. This does not include dynamically-allocated shared memory requested by the user at runtime.
- CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2
- The size in bytes of user-allocated constant memory required by this function.
- CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3
- The size in bytes of local memory used by each thread of this function.
- CU_FUNC_ATTRIBUTE_NUM_REGS = 4
- The number of registers used by each thread of this function.
- CU_FUNC_ATTRIBUTE_PTX_VERSION = 5
- The PTX virtual architecture version for which the function was compiled. This value is the major PTX version * 10 + the minor PTX version, so a PTX version 1.3 function would return the value 13. Note that this may return the undefined value of 0 for cubins compiled prior to CUDA 3.0.
- CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6
- The binary architecture version for which the function was compiled. This value is the major binary version * 10 + the minor binary version, so a binary version 1.3 function would return the value 13. Note that this will return a value of 10 for legacy cubins that do not have a properly-encoded binary architecture version.
- CU_FUNC_ATTRIBUTE_CACHE_MODE_CA = 7
- The attribute to indicate whether the function has been compiled with user specified option "-Xptxas --dlcm=ca" set .
- CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES = 8
- The maximum size in bytes of dynamically-allocated shared memory that can be used by this function. If the user-specified dynamic shared memory size is larger than this value, the launch will fail. See cuFuncSetAttribute, cuKernelSetAttribute
- CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 9
- On devices where the L1 cache and shared memory use the same hardware resources, this sets the shared memory carveout preference, in percent of the total shared memory. Refer to CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR. This is only a hint, and the driver can choose a different ratio if required to execute the function. See cuFuncSetAttribute, cuKernelSetAttribute
- CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET = 10
- If this attribute is set, the kernel must launch with a valid cluster size specified. See cuFuncSetAttribute, cuKernelSetAttribute
- CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH = 11
- The required cluster width in blocks. The values must either all be 0 or all be positive. The validity of the cluster dimensions is otherwise checked at launch time.If the value is set during compile time, it cannot be set at runtime. Setting it at runtime will return CUDA_ERROR_NOT_PERMITTED. See cuFuncSetAttribute, cuKernelSetAttribute
- CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT = 12
- The required cluster height in blocks. The values must either all be 0 or all be positive. The validity of the cluster dimensions is otherwise checked at launch time.If the value is set during compile time, it cannot be set at runtime. Setting it at runtime should return CUDA_ERROR_NOT_PERMITTED. See cuFuncSetAttribute, cuKernelSetAttribute
- CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH = 13
- The required cluster depth in blocks. The values must either all be 0 or all be positive. The validity of the cluster dimensions is otherwise checked at launch time.If the value is set during compile time, it cannot be set at runtime. Setting it at runtime should return CUDA_ERROR_NOT_PERMITTED. See cuFuncSetAttribute, cuKernelSetAttribute
- CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED = 14
- Whether the function can be launched with non-portable cluster size. 1 is allowed, 0 is disallowed. A non-portable cluster size may only function on the specific SKUs the program is tested on. The launch might fail if the program is run on a different hardware platform.CUDA API provides cudaOccupancyMaxActiveClusters to assist with checking whether the desired size can be launched on the current device.Portable Cluster SizeA portable cluster size is guaranteed to be functional on all compute capabilities higher than the target compute capability. The portable cluster size for sm_90 is 8 blocks per cluster. This value may increase for future compute capabilities.The specific hardware unit may support higher cluster sizes that’s not guaranteed to be portable. See cuFuncSetAttribute, cuKernelSetAttribute
- CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE = 15
- The block scheduling policy of a function. The value type is CUclusterSchedulingPolicy / cudaClusterSchedulingPolicy. See cuFuncSetAttribute, cuKernelSetAttribute
- CU_FUNC_ATTRIBUTE_MAX
- enum CUgraphConditionalNodeType
-
Conditional node types
Values
- CU_GRAPH_COND_TYPE_IF = 0
- Conditional 'if' Node. Body executed once if condition value is non-zero.
- CU_GRAPH_COND_TYPE_WHILE = 1
- Conditional 'while' Node. Body executed repeatedly while condition value is non-zero.
- enum CUgraphDebugDot_flags
-
The additional write options for cuGraphDebugDotPrint
Values
- CU_GRAPH_DEBUG_DOT_FLAGS_VERBOSE = 1<<0
- Output all debug data as if every debug flag is enabled
- CU_GRAPH_DEBUG_DOT_FLAGS_RUNTIME_TYPES = 1<<1
- Use CUDA Runtime structures for output
- CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_PARAMS = 1<<2
- Adds CUDA_KERNEL_NODE_PARAMS values to output
- CU_GRAPH_DEBUG_DOT_FLAGS_MEMCPY_NODE_PARAMS = 1<<3
- Adds CUDA_MEMCPY3D values to output
- CU_GRAPH_DEBUG_DOT_FLAGS_MEMSET_NODE_PARAMS = 1<<4
- Adds CUDA_MEMSET_NODE_PARAMS values to output
- CU_GRAPH_DEBUG_DOT_FLAGS_HOST_NODE_PARAMS = 1<<5
- Adds CUDA_HOST_NODE_PARAMS values to output
- CU_GRAPH_DEBUG_DOT_FLAGS_EVENT_NODE_PARAMS = 1<<6
- Adds CUevent handle from record and wait nodes to output
- CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_SIGNAL_NODE_PARAMS = 1<<7
- Adds CUDA_EXT_SEM_SIGNAL_NODE_PARAMS values to output
- CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_WAIT_NODE_PARAMS = 1<<8
- Adds CUDA_EXT_SEM_WAIT_NODE_PARAMS values to output
- CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_ATTRIBUTES = 1<<9
- Adds CUkernelNodeAttrValue values to output
- CU_GRAPH_DEBUG_DOT_FLAGS_HANDLES = 1<<10
- Adds node handles and every kernel function handle to output
- CU_GRAPH_DEBUG_DOT_FLAGS_MEM_ALLOC_NODE_PARAMS = 1<<11
- Adds memory alloc node parameters to output
- CU_GRAPH_DEBUG_DOT_FLAGS_MEM_FREE_NODE_PARAMS = 1<<12
- Adds memory free node parameters to output
- CU_GRAPH_DEBUG_DOT_FLAGS_BATCH_MEM_OP_NODE_PARAMS = 1<<13
- Adds batch mem op node parameters to output
- CU_GRAPH_DEBUG_DOT_FLAGS_EXTRA_TOPO_INFO = 1<<14
- Adds edge numbering information
- CU_GRAPH_DEBUG_DOT_FLAGS_CONDITIONAL_NODE_PARAMS = 1<<15
- Adds conditional node parameters to output
- enum CUgraphDependencyType
-
Type annotations that can be applied to graph edges as part of CUgraphEdgeData.
Values
- CU_GRAPH_DEPENDENCY_TYPE_DEFAULT = 0
- This is an ordinary dependency.
- CU_GRAPH_DEPENDENCY_TYPE_PROGRAMMATIC = 1
- This dependency type allows the downstream node to use cudaGridDependencySynchronize(). It may only be used between kernel nodes, and must be used with either the CU_GRAPH_KERNEL_NODE_PORT_PROGRAMMATIC or CU_GRAPH_KERNEL_NODE_PORT_LAUNCH_ORDER outgoing port.
- enum CUgraphExecUpdateResult
-
CUDA Graph Update error types
Values
- CU_GRAPH_EXEC_UPDATE_SUCCESS = 0x0
- The update succeeded
- CU_GRAPH_EXEC_UPDATE_ERROR = 0x1
- The update failed for an unexpected reason which is described in the return value of the function
- CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED = 0x2
- The update failed because the topology changed
- CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED = 0x3
- The update failed because a node type changed
- CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED = 0x4
- The update failed because the function of a kernel node changed (CUDA driver < 11.2)
- CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED = 0x5
- The update failed because the parameters changed in a way that is not supported
- CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED = 0x6
- The update failed because something about the node is not supported
- CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE = 0x7
- The update failed because the function of a kernel node changed in an unsupported way
- CU_GRAPH_EXEC_UPDATE_ERROR_ATTRIBUTES_CHANGED = 0x8
- The update failed because the node attributes changed in a way that is not supported
- enum CUgraphInstantiateResult
-
Graph instantiation results
Values
- CUDA_GRAPH_INSTANTIATE_SUCCESS = 0
- Instantiation succeeded
- CUDA_GRAPH_INSTANTIATE_ERROR = 1
- Instantiation failed for an unexpected reason which is described in the return value of the function
- CUDA_GRAPH_INSTANTIATE_INVALID_STRUCTURE = 2
- Instantiation failed due to invalid structure, such as cycles
- CUDA_GRAPH_INSTANTIATE_NODE_OPERATION_NOT_SUPPORTED = 3
- Instantiation for device launch failed because the graph contained an unsupported operation
- CUDA_GRAPH_INSTANTIATE_MULTIPLE_CTXS_NOT_SUPPORTED = 4
- Instantiation for device launch failed due to the nodes belonging to different contexts
- enum CUgraphInstantiate_flags
-
Flags for instantiating a graph
Values
- CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH = 1
- Automatically free memory allocated in a graph before relaunching.
- CUDA_GRAPH_INSTANTIATE_FLAG_UPLOAD = 2
- Automatically upload the graph after instantiation. Only supported by cuGraphInstantiateWithParams. The upload will be performed using the stream provided in instantiateParams.
- CUDA_GRAPH_INSTANTIATE_FLAG_DEVICE_LAUNCH = 4
- Instantiate the graph to be launchable from the device. This flag can only be used on platforms which support unified addressing. This flag cannot be used in conjunction with CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH.
- CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY = 8
- Run the graph using the per-node priority attributes rather than the priority of the stream it is launched into.
- enum CUgraphNodeType
-
Graph node types
Values
- CU_GRAPH_NODE_TYPE_KERNEL = 0
- GPU kernel node
- CU_GRAPH_NODE_TYPE_MEMCPY = 1
- Memcpy node
- CU_GRAPH_NODE_TYPE_MEMSET = 2
- Memset node
- CU_GRAPH_NODE_TYPE_HOST = 3
- Host (executable) node
- CU_GRAPH_NODE_TYPE_GRAPH = 4
- Node which executes an embedded graph
- CU_GRAPH_NODE_TYPE_EMPTY = 5
- Empty (no-op) node
- CU_GRAPH_NODE_TYPE_WAIT_EVENT = 6
- External event wait node
- CU_GRAPH_NODE_TYPE_EVENT_RECORD = 7
- External event record node
- CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL = 8
- External semaphore signal node
- CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT = 9
- External semaphore wait node
- CU_GRAPH_NODE_TYPE_MEM_ALLOC = 10
- Memory Allocation Node
- CU_GRAPH_NODE_TYPE_MEM_FREE = 11
- Memory Free Node
- CU_GRAPH_NODE_TYPE_BATCH_MEM_OP = 12
- Batch MemOp Node
- CU_GRAPH_NODE_TYPE_CONDITIONAL = 13
- Conditional NodeMay be used to implement a conditional execution path or loop inside of a graph. The graph(s) contained within the body of the conditional node can be selectively executed or iterated upon based on the value of a conditional variable.Handles must be created in advance of creating the node using cuGraphConditionalHandleCreate.The following restrictions apply to graphs which contain conditional nodes: The graph cannot be used in a child node. Only one instantiation of the graph may exist at any point in time. The graph cannot be cloned.To set the control value:In a kernel or kernels at appropriate locations in the graph, insert a call to void cudaGraphSetConditional(CUgraphConditionalHandle handle, unsigned int value). Supply a default value when creating the handle.
- enum CUgraphicsMapResourceFlags
-
Flags for mapping and unmapping interop resources
Values
- CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00
- CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01
- CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02
- enum CUgraphicsRegisterFlags
-
Flags to register a graphics resource
Values
- CU_GRAPHICS_REGISTER_FLAGS_NONE = 0x00
- CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY = 0x01
- CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = 0x02
- CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST = 0x04
- CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER = 0x08
- enum CUipcMem_flags
-
CUDA Ipc Mem Flags
Values
- CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS = 0x1
- Automatically enable peer access between remote devices as needed
- enum CUjitInputType
-
Device code formats
Values
- CU_JIT_INPUT_CUBIN = 0
- Compiled device-class-specific device code Applicable options: none
- CU_JIT_INPUT_PTX = 1
- PTX source code Applicable options: PTX compiler options
- CU_JIT_INPUT_FATBINARY = 2
- Bundle of multiple cubins and/or PTX of some device code Applicable options: PTX compiler options, CU_JIT_FALLBACK_STRATEGY
- CU_JIT_INPUT_OBJECT = 3
- Host object with embedded device code Applicable options: PTX compiler options, CU_JIT_FALLBACK_STRATEGY
- CU_JIT_INPUT_LIBRARY = 4
- Archive of host objects with embedded device code Applicable options: PTX compiler options, CU_JIT_FALLBACK_STRATEGY
- CU_JIT_INPUT_NVVM = 5
-
Deprecated
High-level intermediate code for link-time optimization Applicable options: NVVM compiler options, PTX compiler options
Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
- CU_JIT_NUM_INPUT_TYPES = 6
- enum CUjit_cacheMode
-
Caching modes for dlcm
Values
- CU_JIT_CACHE_OPTION_NONE = 0
- Compile with no -dlcm flag specified
- CU_JIT_CACHE_OPTION_CG
- Compile with L1 cache disabled
- CU_JIT_CACHE_OPTION_CA
- Compile with L1 cache enabled
- enum CUjit_fallback
-
Cubin matching fallback strategies
Values
- CU_PREFER_PTX = 0
- Prefer to compile ptx if exact binary match not found
- CU_PREFER_BINARY
- Prefer to fall back to compatible binary code if exact match not found
- enum CUjit_option
-
Online compiler and linker options
Values
- CU_JIT_MAX_REGISTERS = 0
- Max number of registers that a thread may use. Option type: unsigned int Applies to: compiler only
- CU_JIT_THREADS_PER_BLOCK = 1
- IN: Specifies minimum number of threads per block to target compilation for OUT: Returns the number of threads the compiler actually targeted. This restricts the resource utilization of the compiler (e.g. max registers) such that a block with the given number of threads should be able to launch based on register limitations. Note, this option does not currently take into account any other resource limitations, such as shared memory utilization. Cannot be combined with CU_JIT_TARGET. Option type: unsigned int Applies to: compiler only
- CU_JIT_WALL_TIME = 2
- Overwrites the option value with the total wall clock time, in milliseconds, spent in the compiler and linker Option type: float Applies to: compiler and linker
- CU_JIT_INFO_LOG_BUFFER = 3
- Pointer to a buffer in which to print any log messages that are informational in nature (the buffer size is specified via option CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES) Option type: char * Applies to: compiler and linker
- CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES = 4
- IN: Log buffer size in bytes. Log messages will be capped at this size (including null terminator) OUT: Amount of log buffer filled with messages Option type: unsigned int Applies to: compiler and linker
- CU_JIT_ERROR_LOG_BUFFER = 5
- Pointer to a buffer in which to print any log messages that reflect errors (the buffer size is specified via option CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES) Option type: char * Applies to: compiler and linker
- CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES = 6
- IN: Log buffer size in bytes. Log messages will be capped at this size (including null terminator) OUT: Amount of log buffer filled with messages Option type: unsigned int Applies to: compiler and linker
- CU_JIT_OPTIMIZATION_LEVEL = 7
- Level of optimizations to apply to generated code (0 - 4), with 4 being the default and highest level of optimizations. Option type: unsigned int Applies to: compiler only
- CU_JIT_TARGET_FROM_CUCONTEXT = 8
- No option value required. Determines the target based on the current attached context (default) Option type: No option value needed Applies to: compiler and linker
- CU_JIT_TARGET = 9
- Target is chosen based on supplied CUjit_target. Cannot be combined with CU_JIT_THREADS_PER_BLOCK. Option type: unsigned int for enumerated type CUjit_target Applies to: compiler and linker
- CU_JIT_FALLBACK_STRATEGY = 10
- Specifies choice of fallback strategy if matching cubin is not found. Choice is based on supplied CUjit_fallback. This option cannot be used with cuLink* APIs as the linker requires exact matches. Option type: unsigned int for enumerated type CUjit_fallback Applies to: compiler only
- CU_JIT_GENERATE_DEBUG_INFO = 11
- Specifies whether to create debug information in output (-g) (0: false, default) Option type: int Applies to: compiler and linker
- CU_JIT_LOG_VERBOSE = 12
- Generate verbose log messages (0: false, default) Option type: int Applies to: compiler and linker
- CU_JIT_GENERATE_LINE_INFO = 13
- Generate line number information (-lineinfo) (0: false, default) Option type: int Applies to: compiler only
- CU_JIT_CACHE_MODE = 14
- Specifies whether to enable caching explicitly (-dlcm) Choice is based on supplied CUjit_cacheMode_enum. Option type: unsigned int for enumerated type CUjit_cacheMode_enum Applies to: compiler only
- CU_JIT_NEW_SM3X_OPT = 15
-
Deprecated
This jit option is deprecated and should not be used.
- CU_JIT_FAST_COMPILE = 16
- This jit option is used for internal purpose only.
- CU_JIT_GLOBAL_SYMBOL_NAMES = 17
- Array of device symbol names that will be relocated to the corresponding host addresses stored in CU_JIT_GLOBAL_SYMBOL_ADDRESSES. Must contain CU_JIT_GLOBAL_SYMBOL_COUNT entries. When loading a device module, driver will relocate all encountered unresolved symbols to the host addresses. It is only allowed to register symbols that correspond to unresolved global variables. It is illegal to register the same device symbol at multiple addresses. Option type: const char ** Applies to: dynamic linker only
- CU_JIT_GLOBAL_SYMBOL_ADDRESSES = 18
- Array of host addresses that will be used to relocate corresponding device symbols stored in CU_JIT_GLOBAL_SYMBOL_NAMES. Must contain CU_JIT_GLOBAL_SYMBOL_COUNT entries. Option type: void ** Applies to: dynamic linker only
- CU_JIT_GLOBAL_SYMBOL_COUNT = 19
- Number of entries in CU_JIT_GLOBAL_SYMBOL_NAMES and CU_JIT_GLOBAL_SYMBOL_ADDRESSES arrays. Option type: unsigned int Applies to: dynamic linker only
- CU_JIT_LTO = 20
-
Deprecated
Enable link-time optimization (-dlto) for device code (Disabled by default). This option is not supported on 32-bit platforms. Option type: int Applies to: compiler and linker
Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
- CU_JIT_FTZ = 21
-
Deprecated
Control single-precision denormals (-ftz) support (0: false, default). 1 : flushes denormal values to zero 0 : preserves denormal values Option type: int Applies to: link-time optimization specified with CU_JIT_LTO
Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
- CU_JIT_PREC_DIV = 22
-
Deprecated
Control single-precision floating-point division and reciprocals (-prec-div) support (1: true, default). 1 : Enables the IEEE round-to-nearest mode 0 : Enables the fast approximation mode Option type: int Applies to: link-time optimization specified with CU_JIT_LTO
Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
- CU_JIT_PREC_SQRT = 23
-
Deprecated
Control single-precision floating-point square root (-prec-sqrt) support (1: true, default). 1 : Enables the IEEE round-to-nearest mode 0 : Enables the fast approximation mode Option type: int Applies to: link-time optimization specified with CU_JIT_LTO
Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
- CU_JIT_FMA = 24
-
Deprecated
Enable/Disable the contraction of floating-point multiplies and adds/subtracts into floating-point multiply-add (-fma) operations (1: Enable, default; 0: Disable). Option type: int Applies to: link-time optimization specified with CU_JIT_LTO
Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
- CU_JIT_REFERENCED_KERNEL_NAMES = 25
-
Deprecated
Array of kernel names that should be preserved at link time while others can be removed. Must contain CU_JIT_REFERENCED_KERNEL_COUNT entries. Note that kernel names can be mangled by the compiler in which case the mangled name needs to be specified. Wildcard "*" can be used to represent zero or more characters instead of specifying the full or mangled name. It is important to note that the wildcard "*" is also added implicitly. For example, specifying "foo" will match "foobaz", "barfoo", "barfoobaz" and thus preserve all kernels with those names. This can be avoided by providing a more specific name like "barfoobaz". Option type: const char ** Applies to: dynamic linker only
Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
- CU_JIT_REFERENCED_KERNEL_COUNT = 26
-
Deprecated
Number of entries in CU_JIT_REFERENCED_KERNEL_NAMES array. Option type: unsigned int Applies to: dynamic linker only
Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
- CU_JIT_REFERENCED_VARIABLE_NAMES = 27
-
Deprecated
Array of variable names (__device__ and/or __constant__) that should be preserved at link time while others can be removed. Must contain CU_JIT_REFERENCED_VARIABLE_COUNT entries. Note that variable names can be mangled by the compiler in which case the mangled name needs to be specified. Wildcard "*" can be used to represent zero or more characters instead of specifying the full or mangled name. It is important to note that the wildcard "*" is also added implicitly. For example, specifying "foo" will match "foobaz", "barfoo", "barfoobaz" and thus preserve all variables with those names. This can be avoided by providing a more specific name like "barfoobaz". Option type: const char ** Applies to: link-time optimization specified with CU_JIT_LTO
Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
- CU_JIT_REFERENCED_VARIABLE_COUNT = 28
-
Deprecated
Number of entries in CU_JIT_REFERENCED_VARIABLE_NAMES array. Option type: unsigned int Applies to: link-time optimization specified with CU_JIT_LTO
Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
- CU_JIT_OPTIMIZE_UNUSED_DEVICE_VARIABLES = 29
-
Deprecated
This option serves as a hint to enable the JIT compiler/linker to remove constant (__constant__) and device (__device__) variables unreferenced in device code (Disabled by default). Note that host references to constant and device variables using APIs like cuModuleGetGlobal() with this option specified may result in undefined behavior unless the variables are explicitly specified using CU_JIT_REFERENCED_VARIABLE_NAMES. Option type: int Applies to: link-time optimization specified with CU_JIT_LTO
Only valid with LTO-IR compiled with toolkits prior to CUDA 12.0
- CU_JIT_POSITION_INDEPENDENT_CODE = 30
- Generate position independent code (0: false) Option type: int Applies to: compiler only
- CU_JIT_MIN_CTA_PER_SM = 31
- This option hints to the JIT compiler the minimum number of CTAs from the kernel’s grid to be mapped to a SM. Optimizations based on this option need the maximum number of threads per block to be specified as well. This option is ignored when used together with CU_JIT_MAX_REGISTERS or CU_JIT_THREADS_PER_BLOCK. Option type: unsigned int Applies to: compiler only
- CU_JIT_NUM_OPTIONS
- enum CUjit_target
-
Online compilation targets
Values
- CU_TARGET_COMPUTE_30 = 30
- Compute device class 3.0
- CU_TARGET_COMPUTE_32 = 32
- Compute device class 3.2
- CU_TARGET_COMPUTE_35 = 35
- Compute device class 3.5
- CU_TARGET_COMPUTE_37 = 37
- Compute device class 3.7
- CU_TARGET_COMPUTE_50 = 50
- Compute device class 5.0
- CU_TARGET_COMPUTE_52 = 52
- Compute device class 5.2
- CU_TARGET_COMPUTE_53 = 53
- Compute device class 5.3
- CU_TARGET_COMPUTE_60 = 60
- Compute device class 6.0.
- CU_TARGET_COMPUTE_61 = 61
- Compute device class 6.1.
- CU_TARGET_COMPUTE_62 = 62
- Compute device class 6.2.
- CU_TARGET_COMPUTE_70 = 70
- Compute device class 7.0.
- CU_TARGET_COMPUTE_72 = 72
- Compute device class 7.2.
- CU_TARGET_COMPUTE_75 = 75
- Compute device class 7.5.
- CU_TARGET_COMPUTE_80 = 80
- Compute device class 8.0.
- CU_TARGET_COMPUTE_86 = 86
- Compute device class 8.6.
- CU_TARGET_COMPUTE_87 = 87
- Compute device class 8.7.
- CU_TARGET_COMPUTE_89 = 89
- Compute device class 8.9.
- CU_TARGET_COMPUTE_90 = 90
- Compute device class 9.0. Compute device class 9.0. with accelerated features.
- CU_TARGET_COMPUTE_90A = CU_COMPUTE_ACCELERATED_TARGET_BASE+CU_TARGET_COMPUTE_90
- enum CUlaunchAttributeID
-
Launch attributes enum; used as id field of CUlaunchAttribute
Values
- CU_LAUNCH_ATTRIBUTE_IGNORE = 0
- Ignored entry, for convenient composition
- CU_LAUNCH_ATTRIBUTE_ACCESS_POLICY_WINDOW = 1
- Valid for streams, graph nodes, launches. See CUlaunchAttributeValue::accessPolicyWindow.
- CU_LAUNCH_ATTRIBUTE_COOPERATIVE = 2
- Valid for graph nodes, launches. See CUlaunchAttributeValue::cooperative.
- CU_LAUNCH_ATTRIBUTE_SYNCHRONIZATION_POLICY = 3
- Valid for streams. See CUlaunchAttributeValue::syncPolicy.
- CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION = 4
- Valid for graph nodes, launches. See CUlaunchAttributeValue::clusterDim.
- CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE = 5
- Valid for graph nodes, launches. See CUlaunchAttributeValue::clusterSchedulingPolicyPreference.
- CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION = 6
- Valid for launches. Setting CUlaunchAttributeValue::programmaticStreamSerializationAllowed to non-0 signals that the kernel will use programmatic means to resolve its stream dependency, so that the CUDA runtime should opportunistically allow the grid's execution to overlap with the previous kernel in the stream, if that kernel requests the overlap. The dependent launches can choose to wait on the dependency using the programmatic sync (cudaGridDependencySynchronize() or equivalent PTX instructions).
- CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_EVENT = 7
- Valid for launches. Set CUlaunchAttributeValue::programmaticEvent to record the event. Event recorded through this launch attribute is guaranteed to only trigger after all block in the associated kernel trigger the event. A block can trigger the event through PTX launchdep.release or CUDA builtin function cudaTriggerProgrammaticLaunchCompletion(). A trigger can also be inserted at the beginning of each block's execution if triggerAtBlockStart is set to non-0. The dependent launches can choose to wait on the dependency using the programmatic sync (cudaGridDependencySynchronize() or equivalent PTX instructions). Note that dependents (including the CPU thread calling cuEventSynchronize()) are not guaranteed to observe the release precisely when it is released. For example, cuEventSynchronize() may only observe the event trigger long after the associated kernel has completed. This recording type is primarily meant for establishing programmatic dependency between device tasks. Note also this type of dependency allows, but does not guarantee, concurrent execution of tasks. The event supplied must not be an interprocess or interop event. The event must disable timing (i.e. must be created with the CU_EVENT_DISABLE_TIMING flag set).
- CU_LAUNCH_ATTRIBUTE_PRIORITY = 8
- Valid for streams, graph nodes, launches. See CUlaunchAttributeValue::priority.
- CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN_MAP = 9
- Valid for streams, graph nodes, launches. See CUlaunchAttributeValue::memSyncDomainMap.
- CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN = 10
- Valid for streams, graph nodes, launches. See CUlaunchAttributeValue::memSyncDomain.
- CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT = 12
- Valid for launches. Set CUlaunchAttributeValue::launchCompletionEvent to record the event. Nominally, the event is triggered once all blocks of the kernel have begun execution. Currently this is a best effort. If a kernel B has a launch completion dependency on a kernel A, B may wait until A is complete. Alternatively, blocks of B may begin before all blocks of A have begun, for example if B can claim execution resources unavailable to A (e.g. they run on different GPUs) or if B is a higher priority than A. Exercise caution if such an ordering inversion could lead to deadlock. A launch completion event is nominally similar to a programmatic event with triggerAtBlockStart set except that it is not visible to cudaGridDependencySynchronize() and can be used with compute capability less than 9.0. The event supplied must not be an interprocess or interop event. The event must disable timing (i.e. must be created with the CU_EVENT_DISABLE_TIMING flag set).
- enum CUlaunchMemSyncDomain
-
Memory Synchronization Domain
A kernel can be launched in a specified memory synchronization domain that affects all memory operations issued by that kernel. A memory barrier issued in one domain will only order memory operations in that domain, thus eliminating latency increase from memory barriers ordering unrelated traffic.
By default, kernels are launched in domain 0. Kernel launched with CU_LAUNCH_MEM_SYNC_DOMAIN_REMOTE will have a different domain ID. User may also alter the domain ID with CUlaunchMemSyncDomainMap for a specific stream / graph node / kernel launch. See CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN, cuStreamSetAttribute, cuLaunchKernelEx, cuGraphKernelNodeSetAttribute.
Memory operations done in kernels launched in different domains are considered system-scope distanced. In other words, a GPU scoped memory synchronization is not sufficient for memory order to be observed by kernels in another memory synchronization domain even if they are on the same GPU.
Values
- CU_LAUNCH_MEM_SYNC_DOMAIN_DEFAULT = 0
- Launch kernels in the default domain
- CU_LAUNCH_MEM_SYNC_DOMAIN_REMOTE = 1
- Launch kernels in the remote domain
- enum CUlibraryOption
-
Library options to be specified with cuLibraryLoadData() or cuLibraryLoadFromFile()
Values
- CU_LIBRARY_HOST_UNIVERSAL_FUNCTION_AND_DATA_TABLE = 0
- CU_LIBRARY_BINARY_IS_PRESERVED = 1
- Specifes that the argument code passed to cuLibraryLoadData() will be preserved. Specifying this option will let the driver know that code can be accessed at any point until cuLibraryUnload(). The default behavior is for the driver to allocate and maintain its own copy of code. Note that this is only a memory usage optimization hint and the driver can choose to ignore it if required. Specifying this option with cuLibraryLoadFromFile() is invalid and will return CUDA_ERROR_INVALID_VALUE.
- CU_LIBRARY_NUM_OPTIONS
- enum CUlimit
-
Limits
Values
- CU_LIMIT_STACK_SIZE = 0x00
- GPU thread stack size
- CU_LIMIT_PRINTF_FIFO_SIZE = 0x01
- GPU printf FIFO size
- CU_LIMIT_MALLOC_HEAP_SIZE = 0x02
- GPU malloc heap size
- CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH = 0x03
- GPU device runtime launch synchronize depth
- CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 0x04
- GPU device runtime pending launch count
- CU_LIMIT_MAX_L2_FETCH_GRANULARITY = 0x05
- A value between 0 and 128 that indicates the maximum fetch granularity of L2 (in Bytes). This is a hint
- CU_LIMIT_PERSISTING_L2_CACHE_SIZE = 0x06
- A size in bytes for L2 persisting lines cache size
- CU_LIMIT_MAX
- enum CUmemAccess_flags
-
Specifies the memory protection flags for mapping.
Values
- CU_MEM_ACCESS_FLAGS_PROT_NONE = 0x0
- Default, make the address range not accessible
- CU_MEM_ACCESS_FLAGS_PROT_READ = 0x1
- Make the address range read accessible
- CU_MEM_ACCESS_FLAGS_PROT_READWRITE = 0x3
- Make the address range read-write accessible
- CU_MEM_ACCESS_FLAGS_PROT_MAX = 0x7FFFFFFF
- enum CUmemAllocationCompType
-
Specifies compression attribute for an allocation.
Values
- CU_MEM_ALLOCATION_COMP_NONE = 0x0
- Allocating non-compressible memory
- CU_MEM_ALLOCATION_COMP_GENERIC = 0x1
- Allocating compressible memory
- enum CUmemAllocationGranularity_flags
-
Flag for requesting different optimal and required granularities for an allocation.
Values
- CU_MEM_ALLOC_GRANULARITY_MINIMUM = 0x0
- Minimum required granularity for allocation
- CU_MEM_ALLOC_GRANULARITY_RECOMMENDED = 0x1
- Recommended granularity for allocation for best performance
- enum CUmemAllocationHandleType
-
Flags for specifying particular handle types
Values
- CU_MEM_HANDLE_TYPE_NONE = 0x0
- Does not allow any export mechanism. >
- CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR = 0x1
- Allows a file descriptor to be used for exporting. Permitted only on POSIX systems. (int)
- CU_MEM_HANDLE_TYPE_WIN32 = 0x2
- Allows a Win32 NT handle to be used for exporting. (HANDLE)
- CU_MEM_HANDLE_TYPE_WIN32_KMT = 0x4
- Allows a Win32 KMT handle to be used for exporting. (D3DKMT_HANDLE)
- CU_MEM_HANDLE_TYPE_FABRIC = 0x8
- Allows a fabric handle to be used for exporting. (CUmemFabricHandle)
- CU_MEM_HANDLE_TYPE_MAX = 0x7FFFFFFF
- enum CUmemAllocationType
-
Defines the allocation types available
Values
- CU_MEM_ALLOCATION_TYPE_INVALID = 0x0
- CU_MEM_ALLOCATION_TYPE_PINNED = 0x1
- This allocation type is 'pinned', i.e. cannot migrate from its current location while the application is actively using it
- CU_MEM_ALLOCATION_TYPE_MAX = 0x7FFFFFFF
- enum CUmemAttach_flags
-
CUDA Mem Attach Flags
Values
- CU_MEM_ATTACH_GLOBAL = 0x1
- Memory can be accessed by any stream on any device
- CU_MEM_ATTACH_HOST = 0x2
- Memory cannot be accessed by any stream on any device
- CU_MEM_ATTACH_SINGLE = 0x4
- Memory can only be accessed by a single stream on the associated device
- enum CUmemHandleType
-
Memory handle types
Values
- CU_MEM_HANDLE_TYPE_GENERIC = 0
- enum CUmemLocationType
-
Specifies the type of location
Values
- CU_MEM_LOCATION_TYPE_INVALID = 0x0
- CU_MEM_LOCATION_TYPE_DEVICE = 0x1
- Location is a device location, thus id is a device ordinal
- CU_MEM_LOCATION_TYPE_HOST = 0x2
- Location is host, id is ignored
- CU_MEM_LOCATION_TYPE_HOST_NUMA = 0x3
- Location is a host NUMA node, thus id is a host NUMA node id
- CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT = 0x4
- Location is a host NUMA node of the current thread, id is ignored
- CU_MEM_LOCATION_TYPE_MAX = 0x7FFFFFFF
- enum CUmemOperationType
-
Memory operation types
Values
- CU_MEM_OPERATION_TYPE_MAP = 1
- CU_MEM_OPERATION_TYPE_UNMAP = 2
- enum CUmemPool_attribute
-
CUDA memory pool attributes
Values
- CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES = 1
- (value type = int) Allow cuMemAllocAsync to use memory asynchronously freed in another streams as long as a stream ordering dependency of the allocating stream on the free action exists. Cuda events and null stream interactions can create the required stream ordered dependencies. (default enabled)
- CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC
- (value type = int) Allow reuse of already completed frees when there is no dependency between the free and allocation. (default enabled)
- CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES
- (value type = int) Allow cuMemAllocAsync to insert new stream dependencies in order to establish the stream ordering required to reuse a piece of memory released by cuFreeAsync (default enabled).
- CU_MEMPOOL_ATTR_RELEASE_THRESHOLD
- (value type = cuuint64_t) Amount of reserved memory in bytes to hold onto before trying to release memory back to the OS. When more than the release threshold bytes of memory are held by the memory pool, the allocator will try to release memory back to the OS on the next call to stream, event or context synchronize. (default 0)
- CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT
- (value type = cuuint64_t) Amount of backing memory currently allocated for the mempool.
- CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH
- (value type = cuuint64_t) High watermark of backing memory allocated for the mempool since the last time it was reset. High watermark can only be reset to zero.
- CU_MEMPOOL_ATTR_USED_MEM_CURRENT
- (value type = cuuint64_t) Amount of memory from the pool that is currently in use by the application.
- CU_MEMPOOL_ATTR_USED_MEM_HIGH
- (value type = cuuint64_t) High watermark of the amount of memory from the pool that was in use by the application since the last time it was reset. High watermark can only be reset to zero.
- enum CUmemRangeHandleType
-
Specifies the handle type for address range
Values
- CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD = 0x1
- CU_MEM_RANGE_HANDLE_TYPE_MAX = 0x7FFFFFFF
- enum CUmem_advise
-
Memory advise values
Values
- CU_MEM_ADVISE_SET_READ_MOSTLY = 1
- Data will mostly be read and only occasionally be written to
- CU_MEM_ADVISE_UNSET_READ_MOSTLY = 2
- Undo the effect of CU_MEM_ADVISE_SET_READ_MOSTLY
- CU_MEM_ADVISE_SET_PREFERRED_LOCATION = 3
- Set the preferred location for the data as the specified device
- CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION = 4
- Clear the preferred location for the data
- CU_MEM_ADVISE_SET_ACCESSED_BY = 5
- Data will be accessed by the specified device, so prevent page faults as much as possible
- CU_MEM_ADVISE_UNSET_ACCESSED_BY = 6
- Let the Unified Memory subsystem decide on the page faulting policy for the specified device
- enum CUmemorytype
-
Memory types
Values
- CU_MEMORYTYPE_HOST = 0x01
- Host memory
- CU_MEMORYTYPE_DEVICE = 0x02
- Device memory
- CU_MEMORYTYPE_ARRAY = 0x03
- Array memory
- CU_MEMORYTYPE_UNIFIED = 0x04
- Unified device or host memory
- enum CUmulticastGranularity_flags
-
Flags for querying different granularities for a multicast object
Values
- CU_MULTICAST_GRANULARITY_MINIMUM = 0x0
- Minimum required granularity
- CU_MULTICAST_GRANULARITY_RECOMMENDED = 0x1
- Recommended granularity for best performance
- enum CUoccupancy_flags
-
Occupancy calculator flag
Values
- CU_OCCUPANCY_DEFAULT = 0x0
- Default behavior
- CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE = 0x1
- Assume global caching is enabled and cannot be automatically turned off
- enum CUpointer_attribute
-
Pointer information
Values
- CU_POINTER_ATTRIBUTE_CONTEXT = 1
- The CUcontext on which a pointer was allocated or registered
- CU_POINTER_ATTRIBUTE_MEMORY_TYPE = 2
- The CUmemorytype describing the physical location of a pointer
- CU_POINTER_ATTRIBUTE_DEVICE_POINTER = 3
- The address at which a pointer's memory may be accessed on the device
- CU_POINTER_ATTRIBUTE_HOST_POINTER = 4
- The address at which a pointer's memory may be accessed on the host
- CU_POINTER_ATTRIBUTE_P2P_TOKENS = 5
- A pair of tokens for use with the nv-p2p.h Linux kernel interface
- CU_POINTER_ATTRIBUTE_SYNC_MEMOPS = 6
- Synchronize every synchronous memory operation initiated on this region
- CU_POINTER_ATTRIBUTE_BUFFER_ID = 7
- A process-wide unique ID for an allocated memory region
- CU_POINTER_ATTRIBUTE_IS_MANAGED = 8
- Indicates if the pointer points to managed memory
- CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL = 9
- A device ordinal of a device on which a pointer was allocated or registered
- CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE = 10
- 1 if this pointer maps to an allocation that is suitable for cudaIpcGetMemHandle, 0 otherwise
- CU_POINTER_ATTRIBUTE_RANGE_START_ADDR = 11
- Starting address for this requested pointer
- CU_POINTER_ATTRIBUTE_RANGE_SIZE = 12
- Size of the address range for this requested pointer
- CU_POINTER_ATTRIBUTE_MAPPED = 13
- 1 if this pointer is in a valid address range that is mapped to a backing allocation, 0 otherwise
- CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES = 14
- Bitmask of allowed CUmemAllocationHandleType for this allocation
- CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE = 15
- 1 if the memory this pointer is referencing can be used with the GPUDirect RDMA API
- CU_POINTER_ATTRIBUTE_ACCESS_FLAGS = 16
- Returns the access flags the device associated with the current context has on the corresponding memory referenced by the pointer given
- CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE = 17
- Returns the mempool handle for the allocation if it was allocated from a mempool. Otherwise returns NULL.
- CU_POINTER_ATTRIBUTE_MAPPING_SIZE = 18
- Size of the actual underlying mapping that the pointer belongs to
- CU_POINTER_ATTRIBUTE_MAPPING_BASE_ADDR = 19
- The start address of the mapping that the pointer belongs to
- CU_POINTER_ATTRIBUTE_MEMORY_BLOCK_ID = 20
- A process-wide unique id corresponding to the physical allocation the pointer belongs to
- enum CUresourceViewFormat
-
Resource view format
Values
- CU_RES_VIEW_FORMAT_NONE = 0x00
- No resource view format (use underlying resource format)
- CU_RES_VIEW_FORMAT_UINT_1X8 = 0x01
- 1 channel unsigned 8-bit integers
- CU_RES_VIEW_FORMAT_UINT_2X8 = 0x02
- 2 channel unsigned 8-bit integers
- CU_RES_VIEW_FORMAT_UINT_4X8 = 0x03
- 4 channel unsigned 8-bit integers
- CU_RES_VIEW_FORMAT_SINT_1X8 = 0x04
- 1 channel signed 8-bit integers
- CU_RES_VIEW_FORMAT_SINT_2X8 = 0x05
- 2 channel signed 8-bit integers
- CU_RES_VIEW_FORMAT_SINT_4X8 = 0x06
- 4 channel signed 8-bit integers
- CU_RES_VIEW_FORMAT_UINT_1X16 = 0x07
- 1 channel unsigned 16-bit integers
- CU_RES_VIEW_FORMAT_UINT_2X16 = 0x08
- 2 channel unsigned 16-bit integers
- CU_RES_VIEW_FORMAT_UINT_4X16 = 0x09
- 4 channel unsigned 16-bit integers
- CU_RES_VIEW_FORMAT_SINT_1X16 = 0x0a
- 1 channel signed 16-bit integers
- CU_RES_VIEW_FORMAT_SINT_2X16 = 0x0b
- 2 channel signed 16-bit integers
- CU_RES_VIEW_FORMAT_SINT_4X16 = 0x0c
- 4 channel signed 16-bit integers
- CU_RES_VIEW_FORMAT_UINT_1X32 = 0x0d
- 1 channel unsigned 32-bit integers
- CU_RES_VIEW_FORMAT_UINT_2X32 = 0x0e
- 2 channel unsigned 32-bit integers
- CU_RES_VIEW_FORMAT_UINT_4X32 = 0x0f
- 4 channel unsigned 32-bit integers
- CU_RES_VIEW_FORMAT_SINT_1X32 = 0x10
- 1 channel signed 32-bit integers
- CU_RES_VIEW_FORMAT_SINT_2X32 = 0x11
- 2 channel signed 32-bit integers
- CU_RES_VIEW_FORMAT_SINT_4X32 = 0x12
- 4 channel signed 32-bit integers
- CU_RES_VIEW_FORMAT_FLOAT_1X16 = 0x13
- 1 channel 16-bit floating point
- CU_RES_VIEW_FORMAT_FLOAT_2X16 = 0x14
- 2 channel 16-bit floating point
- CU_RES_VIEW_FORMAT_FLOAT_4X16 = 0x15
- 4 channel 16-bit floating point
- CU_RES_VIEW_FORMAT_FLOAT_1X32 = 0x16
- 1 channel 32-bit floating point
- CU_RES_VIEW_FORMAT_FLOAT_2X32 = 0x17
- 2 channel 32-bit floating point
- CU_RES_VIEW_FORMAT_FLOAT_4X32 = 0x18
- 4 channel 32-bit floating point
- CU_RES_VIEW_FORMAT_UNSIGNED_BC1 = 0x19
- Block compressed 1
- CU_RES_VIEW_FORMAT_UNSIGNED_BC2 = 0x1a
- Block compressed 2
- CU_RES_VIEW_FORMAT_UNSIGNED_BC3 = 0x1b
- Block compressed 3
- CU_RES_VIEW_FORMAT_UNSIGNED_BC4 = 0x1c
- Block compressed 4 unsigned
- CU_RES_VIEW_FORMAT_SIGNED_BC4 = 0x1d
- Block compressed 4 signed
- CU_RES_VIEW_FORMAT_UNSIGNED_BC5 = 0x1e
- Block compressed 5 unsigned
- CU_RES_VIEW_FORMAT_SIGNED_BC5 = 0x1f
- Block compressed 5 signed
- CU_RES_VIEW_FORMAT_UNSIGNED_BC6H = 0x20
- Block compressed 6 unsigned half-float
- CU_RES_VIEW_FORMAT_SIGNED_BC6H = 0x21
- Block compressed 6 signed half-float
- CU_RES_VIEW_FORMAT_UNSIGNED_BC7 = 0x22
- Block compressed 7
- enum CUresourcetype
-
Resource types
Values
- CU_RESOURCE_TYPE_ARRAY = 0x00
- Array resource
- CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01
- Mipmapped array resource
- CU_RESOURCE_TYPE_LINEAR = 0x02
- Linear resource
- CU_RESOURCE_TYPE_PITCH2D = 0x03
- Pitch 2D resource
- enum CUresult
-
Error codes
Values
- CUDA_SUCCESS = 0
- The API call returned with no errors. In the case of query calls, this also means that the operation being queried is complete (see cuEventQuery() and cuStreamQuery()).
- CUDA_ERROR_INVALID_VALUE = 1
- This indicates that one or more of the parameters passed to the API call is not within an acceptable range of values.
- CUDA_ERROR_OUT_OF_MEMORY = 2
- The API call failed because it was unable to allocate enough memory or other resources to perform the requested operation.
- CUDA_ERROR_NOT_INITIALIZED = 3
- This indicates that the CUDA driver has not been initialized with cuInit() or that initialization has failed.
- CUDA_ERROR_DEINITIALIZED = 4
- This indicates that the CUDA driver is in the process of shutting down.
- CUDA_ERROR_PROFILER_DISABLED = 5
- This indicates profiler is not initialized for this run. This can happen when the application is running with external profiling tools like visual profiler.
- CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6
-
Deprecated
This error return is deprecated as of CUDA 5.0. It is no longer an error to attempt to enable/disable the profiling via cuProfilerStart or cuProfilerStop without initialization.
- CUDA_ERROR_PROFILER_ALREADY_STARTED = 7
-
Deprecated
This error return is deprecated as of CUDA 5.0. It is no longer an error to call cuProfilerStart() when profiling is already enabled.
- CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8
-
Deprecated
This error return is deprecated as of CUDA 5.0. It is no longer an error to call cuProfilerStop() when profiling is already disabled.
- CUDA_ERROR_STUB_LIBRARY = 34
- This indicates that the CUDA driver that the application has loaded is a stub library. Applications that run with the stub rather than a real driver loaded will result in CUDA API returning this error.
- CUDA_ERROR_DEVICE_UNAVAILABLE = 46
- This indicates that requested CUDA device is unavailable at the current time. Devices are often unavailable due to use of CU_COMPUTEMODE_EXCLUSIVE_PROCESS or CU_COMPUTEMODE_PROHIBITED.
- CUDA_ERROR_NO_DEVICE = 100
- This indicates that no CUDA-capable devices were detected by the installed CUDA driver.
- CUDA_ERROR_INVALID_DEVICE = 101
- This indicates that the device ordinal supplied by the user does not correspond to a valid CUDA device or that the action requested is invalid for the specified device.
- CUDA_ERROR_DEVICE_NOT_LICENSED = 102
- This error indicates that the Grid license is not applied.
- CUDA_ERROR_INVALID_IMAGE = 200
- This indicates that the device kernel image is invalid. This can also indicate an invalid CUDA module.
- CUDA_ERROR_INVALID_CONTEXT = 201
- This most frequently indicates that there is no context bound to the current thread. This can also be returned if the context passed to an API call is not a valid handle (such as a context that has had cuCtxDestroy() invoked on it). This can also be returned if a user mixes different API versions (i.e. 3010 context with 3020 API calls). See cuCtxGetApiVersion() for more details.
- CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202
-
Deprecated
This error return is deprecated as of CUDA 3.2. It is no longer an error to attempt to push the active context via cuCtxPushCurrent().
This indicated that the context being supplied as a parameter to the API call was already the active context.
- CUDA_ERROR_MAP_FAILED = 205
- This indicates that a map or register operation has failed.
- CUDA_ERROR_UNMAP_FAILED = 206
- This indicates that an unmap or unregister operation has failed.
- CUDA_ERROR_ARRAY_IS_MAPPED = 207
- This indicates that the specified array is currently mapped and thus cannot be destroyed.
- CUDA_ERROR_ALREADY_MAPPED = 208
- This indicates that the resource is already mapped.
- CUDA_ERROR_NO_BINARY_FOR_GPU = 209
- This indicates that there is no kernel image available that is suitable for the device. This can occur when a user specifies code generation options for a particular CUDA source file that do not include the corresponding device configuration.
- CUDA_ERROR_ALREADY_ACQUIRED = 210
- This indicates that a resource has already been acquired.
- CUDA_ERROR_NOT_MAPPED = 211
- This indicates that a resource is not mapped.
- CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212
- This indicates that a mapped resource is not available for access as an array.
- CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213
- This indicates that a mapped resource is not available for access as a pointer.
- CUDA_ERROR_ECC_UNCORRECTABLE = 214
- This indicates that an uncorrectable ECC error was detected during execution.
- CUDA_ERROR_UNSUPPORTED_LIMIT = 215
- This indicates that the CUlimit passed to the API call is not supported by the active device.
- CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216
- This indicates that the CUcontext passed to the API call can only be bound to a single CPU thread at a time but is already bound to a CPU thread.
- CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217
- This indicates that peer access is not supported across the given devices.
- CUDA_ERROR_INVALID_PTX = 218
- This indicates that a PTX JIT compilation failed.
- CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219
- This indicates an error with OpenGL or DirectX context.
- CUDA_ERROR_NVLINK_UNCORRECTABLE = 220
- This indicates that an uncorrectable NVLink error was detected during the execution.
- CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221
- This indicates that the PTX JIT compiler library was not found.
- CUDA_ERROR_UNSUPPORTED_PTX_VERSION = 222
- This indicates that the provided PTX was compiled with an unsupported toolchain.
- CUDA_ERROR_JIT_COMPILATION_DISABLED = 223
- This indicates that the PTX JIT compilation was disabled.
- CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY = 224
- This indicates that the CUexecAffinityType passed to the API call is not supported by the active device.
- CUDA_ERROR_UNSUPPORTED_DEVSIDE_SYNC = 225
- This indicates that the code to be compiled by the PTX JIT contains unsupported call to cudaDeviceSynchronize.
- CUDA_ERROR_INVALID_SOURCE = 300
- This indicates that the device kernel source is invalid. This includes compilation/linker errors encountered in device code or user error.
- CUDA_ERROR_FILE_NOT_FOUND = 301
- This indicates that the file specified was not found.
- CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302
- This indicates that a link to a shared object failed to resolve.
- CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303
- This indicates that initialization of a shared object failed.
- CUDA_ERROR_OPERATING_SYSTEM = 304
- This indicates that an OS call failed.
- CUDA_ERROR_INVALID_HANDLE = 400
- This indicates that a resource handle passed to the API call was not valid. Resource handles are opaque types like CUstream and CUevent.
- CUDA_ERROR_ILLEGAL_STATE = 401
- This indicates that a resource required by the API call is not in a valid state to perform the requested operation.
- CUDA_ERROR_LOSSY_QUERY = 402
- This indicates an attempt was made to introspect an object in a way that would discard semantically important information. This is either due to the object using funtionality newer than the API version used to introspect it or omission of optional return arguments.
- CUDA_ERROR_NOT_FOUND = 500
- This indicates that a named symbol was not found. Examples of symbols are global/constant variable names, driver function names, texture names, and surface names.
- CUDA_ERROR_NOT_READY = 600
- This indicates that asynchronous operations issued previously have not completed yet. This result is not actually an error, but must be indicated differently than CUDA_SUCCESS (which indicates completion). Calls that may return this value include cuEventQuery() and cuStreamQuery().
- CUDA_ERROR_ILLEGAL_ADDRESS = 700
- While executing a kernel, the device encountered a load or store instruction on an invalid memory address. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
- CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701
- This indicates that a launch did not occur because it did not have appropriate resources. This error usually indicates that the user has attempted to pass too many arguments to the device kernel, or the kernel launch specifies too many threads for the kernel's register count. Passing arguments of the wrong size (i.e. a 64-bit pointer when a 32-bit int is expected) is equivalent to passing too many arguments and can also result in this error.
- CUDA_ERROR_LAUNCH_TIMEOUT = 702
- This indicates that the device kernel took too long to execute. This can only occur if timeouts are enabled - see the device attribute CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
- CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703
- This error indicates a kernel launch that uses an incompatible texturing mode.
- CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704
- This error indicates that a call to cuCtxEnablePeerAccess() is trying to re-enable peer access to a context which has already had peer access to it enabled.
- CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705
- This error indicates that cuCtxDisablePeerAccess() is trying to disable peer access which has not been enabled yet via cuCtxEnablePeerAccess().
- CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708
- This error indicates that the primary context for the specified device has already been initialized.
- CUDA_ERROR_CONTEXT_IS_DESTROYED = 709
- This error indicates that the context current to the calling thread has been destroyed using cuCtxDestroy, or is a primary context which has not yet been initialized.
- CUDA_ERROR_ASSERT = 710
- A device-side assert triggered during kernel execution. The context cannot be used anymore, and must be destroyed. All existing device memory allocations from this context are invalid and must be reconstructed if the program is to continue using CUDA.
- CUDA_ERROR_TOO_MANY_PEERS = 711
- This error indicates that the hardware resources required to enable peer access have been exhausted for one or more of the devices passed to cuCtxEnablePeerAccess().
- CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712
- This error indicates that the memory range passed to cuMemHostRegister() has already been registered.
- CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED = 713
- This error indicates that the pointer passed to cuMemHostUnregister() does not correspond to any currently registered memory region.
- CUDA_ERROR_HARDWARE_STACK_ERROR = 714
- While executing a kernel, the device encountered a stack error. This can be due to stack corruption or exceeding the stack size limit. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
- CUDA_ERROR_ILLEGAL_INSTRUCTION = 715
- While executing a kernel, the device encountered an illegal instruction. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
- CUDA_ERROR_MISALIGNED_ADDRESS = 716
- While executing a kernel, the device encountered a load or store instruction on a memory address which is not aligned. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
- CUDA_ERROR_INVALID_ADDRESS_SPACE = 717
- While executing a kernel, the device encountered an instruction which can only operate on memory locations in certain address spaces (global, shared, or local), but was supplied a memory address not belonging to an allowed address space. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
- CUDA_ERROR_INVALID_PC = 718
- While executing a kernel, the device program counter wrapped its address space. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
- CUDA_ERROR_LAUNCH_FAILED = 719
- An exception occurred on the device while executing a kernel. Common causes include dereferencing an invalid device pointer and accessing out of bounds shared memory. Less common cases can be system specific - more information about these cases can be found in the system specific user guide. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
- CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720
- This error indicates that the number of blocks launched per grid for a kernel that was launched via either cuLaunchCooperativeKernel or cuLaunchCooperativeKernelMultiDevice exceeds the maximum number of blocks as allowed by cuOccupancyMaxActiveBlocksPerMultiprocessor or cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors as specified by the device attribute CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT.
- CUDA_ERROR_NOT_PERMITTED = 800
- This error indicates that the attempted operation is not permitted.
- CUDA_ERROR_NOT_SUPPORTED = 801
- This error indicates that the attempted operation is not supported on the current system or device.
- CUDA_ERROR_SYSTEM_NOT_READY = 802
- This error indicates that the system is not yet ready to start any CUDA work. To continue using CUDA, verify the system configuration is in a valid state and all required driver daemons are actively running. More information about this error can be found in the system specific user guide.
- CUDA_ERROR_SYSTEM_DRIVER_MISMATCH = 803
- This error indicates that there is a mismatch between the versions of the display driver and the CUDA driver. Refer to the compatibility documentation for supported versions.
- CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804
- This error indicates that the system was upgraded to run with forward compatibility but the visible hardware detected by CUDA does not support this configuration. Refer to the compatibility documentation for the supported hardware matrix or ensure that only supported hardware is visible during initialization via the CUDA_VISIBLE_DEVICES environment variable.
- CUDA_ERROR_MPS_CONNECTION_FAILED = 805
- This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS server.
- CUDA_ERROR_MPS_RPC_FAILURE = 806
- This error indicates that the remote procedural call between the MPS server and the MPS client failed.
- CUDA_ERROR_MPS_SERVER_NOT_READY = 807
- This error indicates that the MPS server is not ready to accept new MPS client requests. This error can be returned when the MPS server is in the process of recovering from a fatal failure.
- CUDA_ERROR_MPS_MAX_CLIENTS_REACHED = 808
- This error indicates that the hardware resources required to create MPS client have been exhausted.
- CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED = 809
- This error indicates the the hardware resources required to support device connections have been exhausted.
- CUDA_ERROR_MPS_CLIENT_TERMINATED = 810
- This error indicates that the MPS client has been terminated by the server. To continue using CUDA, the process must be terminated and relaunched.
- CUDA_ERROR_CDP_NOT_SUPPORTED = 811
- This error indicates that the module is using CUDA Dynamic Parallelism, but the current configuration, like MPS, does not support it.
- CUDA_ERROR_CDP_VERSION_MISMATCH = 812
- This error indicates that a module contains an unsupported interaction between different versions of CUDA Dynamic Parallelism.
- CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED = 900
- This error indicates that the operation is not permitted when the stream is capturing.
- CUDA_ERROR_STREAM_CAPTURE_INVALIDATED = 901
- This error indicates that the current capture sequence on the stream has been invalidated due to a previous error.
- CUDA_ERROR_STREAM_CAPTURE_MERGE = 902
- This error indicates that the operation would have resulted in a merge of two independent capture sequences.
- CUDA_ERROR_STREAM_CAPTURE_UNMATCHED = 903
- This error indicates that the capture was not initiated in this stream.
- CUDA_ERROR_STREAM_CAPTURE_UNJOINED = 904
- This error indicates that the capture sequence contains a fork that was not joined to the primary stream.
- CUDA_ERROR_STREAM_CAPTURE_ISOLATION = 905
- This error indicates that a dependency would have been created which crosses the capture sequence boundary. Only implicit in-stream ordering dependencies are allowed to cross the boundary.
- CUDA_ERROR_STREAM_CAPTURE_IMPLICIT = 906
- This error indicates a disallowed implicit dependency on a current capture sequence from cudaStreamLegacy.
- CUDA_ERROR_CAPTURED_EVENT = 907
- This error indicates that the operation is not permitted on an event which was last recorded in a capturing stream.
- CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD = 908
- A stream capture sequence not initiated with the CU_STREAM_CAPTURE_MODE_RELAXED argument to cuStreamBeginCapture was passed to cuStreamEndCapture in a different thread.
- CUDA_ERROR_TIMEOUT = 909
- This error indicates that the timeout specified for the wait operation has lapsed.
- CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE = 910
- This error indicates that the graph update was not performed because it included changes which violated constraints specific to instantiated graph update.
- CUDA_ERROR_EXTERNAL_DEVICE = 911
- This indicates that an async error has occurred in a device outside of CUDA. If CUDA was waiting for an external device's signal before consuming shared data, the external device signaled an error indicating that the data is not valid for consumption. This leaves the process in an inconsistent state and any further CUDA work will return the same error. To continue using CUDA, the process must be terminated and relaunched.
- CUDA_ERROR_INVALID_CLUSTER_SIZE = 912
- Indicates a kernel launch error due to cluster misconfiguration.
- CUDA_ERROR_UNKNOWN = 999
- This indicates that an unknown internal error has occurred.
- enum CUshared_carveout
-
Shared memory carveout configurations. These may be passed to cuFuncSetAttribute or cuKernelSetAttribute
Values
- CU_SHAREDMEM_CARVEOUT_DEFAULT = -1
- No preference for shared memory or L1 (default)
- CU_SHAREDMEM_CARVEOUT_MAX_SHARED = 100
- Prefer maximum available shared memory, minimum L1 cache
- CU_SHAREDMEM_CARVEOUT_MAX_L1 = 0
- Prefer maximum available L1 cache, minimum shared memory
- enum CUsharedconfig
-
Shared memory configurations
Values
- CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE = 0x00
- set default shared memory bank size
- CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE = 0x01
- set shared memory bank width to four bytes
- CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE = 0x02
- set shared memory bank width to eight bytes
- enum CUstreamBatchMemOpType
-
Operations for cuStreamBatchMemOp
Values
- CU_STREAM_MEM_OP_WAIT_VALUE_32 = 1
- Represents a cuStreamWaitValue32 operation
- CU_STREAM_MEM_OP_WRITE_VALUE_32 = 2
- Represents a cuStreamWriteValue32 operation
- CU_STREAM_MEM_OP_WAIT_VALUE_64 = 4
- Represents a cuStreamWaitValue64 operation
- CU_STREAM_MEM_OP_WRITE_VALUE_64 = 5
- Represents a cuStreamWriteValue64 operation
- CU_STREAM_MEM_OP_BARRIER = 6
- Insert a memory barrier of the specified type
- CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES = 3
- This has the same effect as CU_STREAM_WAIT_VALUE_FLUSH, but as a standalone operation.
- enum CUstreamCaptureMode
-
Possible modes for stream capture thread interactions. For more details see cuStreamBeginCapture and cuThreadExchangeStreamCaptureMode
Values
- CU_STREAM_CAPTURE_MODE_GLOBAL = 0
- CU_STREAM_CAPTURE_MODE_THREAD_LOCAL = 1
- CU_STREAM_CAPTURE_MODE_RELAXED = 2
- enum CUstreamCaptureStatus
-
Possible stream capture statuses returned by cuStreamIsCapturing
Values
- CU_STREAM_CAPTURE_STATUS_NONE = 0
- Stream is not capturing
- CU_STREAM_CAPTURE_STATUS_ACTIVE = 1
- Stream is actively capturing
- CU_STREAM_CAPTURE_STATUS_INVALIDATED = 2
- Stream is part of a capture sequence that has been invalidated, but not terminated
- enum CUstreamMemoryBarrier_flags
-
Flags for cuStreamMemoryBarrier
Values
- CU_STREAM_MEMORY_BARRIER_TYPE_SYS = 0x0
- System-wide memory barrier.
- CU_STREAM_MEMORY_BARRIER_TYPE_GPU = 0x1
- Limit memory barrier scope to the GPU.
- enum CUstreamUpdateCaptureDependencies_flags
-
Flags for cuStreamUpdateCaptureDependencies
Values
- CU_STREAM_ADD_CAPTURE_DEPENDENCIES = 0x0
- Add new nodes to the dependency set
- CU_STREAM_SET_CAPTURE_DEPENDENCIES = 0x1
- Replace the dependency set with the new nodes
- enum CUstreamWaitValue_flags
-
Flags for cuStreamWaitValue32 and cuStreamWaitValue64
Values
- CU_STREAM_WAIT_VALUE_GEQ = 0x0
- Wait until (int32_t)(*addr - value) >= 0 (or int64_t for 64 bit values). Note this is a cyclic comparison which ignores wraparound. (Default behavior.)
- CU_STREAM_WAIT_VALUE_EQ = 0x1
- Wait until *addr == value.
- CU_STREAM_WAIT_VALUE_AND = 0x2
- Wait until (*addr & value) != 0.
- CU_STREAM_WAIT_VALUE_NOR = 0x3
- Wait until ~(*addr | value) != 0. Support for this operation can be queried with cuDeviceGetAttribute() and CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR.
- CU_STREAM_WAIT_VALUE_FLUSH = 1<<30
- Follow the wait operation with a flush of outstanding remote writes. This means that, if a remote write operation is guaranteed to have reached the device before the wait can be satisfied, that write is guaranteed to be visible to downstream device work. The device is permitted to reorder remote writes internally. For example, this flag would be required if two remote writes arrive in a defined order, the wait is satisfied by the second write, and downstream work needs to observe the first write. Support for this operation is restricted to selected platforms and can be queried with CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES.
- enum CUstreamWriteValue_flags
-
Flags for cuStreamWriteValue32
Values
- CU_STREAM_WRITE_VALUE_DEFAULT = 0x0
- Default behavior
- CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER = 0x1
- Permits the write to be reordered with writes which were issued before it, as a performance optimization. Normally, cuStreamWriteValue32 will provide a memory fence before the write, which has similar semantics to __threadfence_system() but is scoped to the stream rather than a CUDA thread. This flag is not supported in the v2 API.
- enum CUstream_flags
-
Stream creation flags
Values
- CU_STREAM_DEFAULT = 0x0
- Default stream flag
- CU_STREAM_NON_BLOCKING = 0x1
- Stream does not synchronize with stream 0 (the NULL stream)
- enum CUtensorMapDataType
-
Tensor map data type
Values
- CU_TENSOR_MAP_DATA_TYPE_UINT8 = 0
- CU_TENSOR_MAP_DATA_TYPE_UINT16
- CU_TENSOR_MAP_DATA_TYPE_UINT32
- CU_TENSOR_MAP_DATA_TYPE_INT32
- CU_TENSOR_MAP_DATA_TYPE_UINT64
- CU_TENSOR_MAP_DATA_TYPE_INT64
- CU_TENSOR_MAP_DATA_TYPE_FLOAT16
- CU_TENSOR_MAP_DATA_TYPE_FLOAT32
- CU_TENSOR_MAP_DATA_TYPE_FLOAT64
- CU_TENSOR_MAP_DATA_TYPE_BFLOAT16
- CU_TENSOR_MAP_DATA_TYPE_FLOAT32_FTZ
- CU_TENSOR_MAP_DATA_TYPE_TFLOAT32
- CU_TENSOR_MAP_DATA_TYPE_TFLOAT32_FTZ
- enum CUtensorMapFloatOOBfill
-
Tensor map out-of-bounds fill type
Values
- CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE = 0
- CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA
- enum CUtensorMapInterleave
-
Tensor map interleave layout type
Values
- CU_TENSOR_MAP_INTERLEAVE_NONE = 0
- CU_TENSOR_MAP_INTERLEAVE_16B
- CU_TENSOR_MAP_INTERLEAVE_32B
- enum CUtensorMapL2promotion
-
Tensor map L2 promotion type
Values
- CU_TENSOR_MAP_L2_PROMOTION_NONE = 0
- CU_TENSOR_MAP_L2_PROMOTION_L2_64B
- CU_TENSOR_MAP_L2_PROMOTION_L2_128B
- CU_TENSOR_MAP_L2_PROMOTION_L2_256B
- enum CUtensorMapSwizzle
-
Tensor map swizzling mode of shared memory banks
Values
- CU_TENSOR_MAP_SWIZZLE_NONE = 0
- CU_TENSOR_MAP_SWIZZLE_32B
- CU_TENSOR_MAP_SWIZZLE_64B
- CU_TENSOR_MAP_SWIZZLE_128B
- enum CUuserObjectRetain_flags
-
Flags for retaining user object references for graphs
Values
- CU_GRAPH_USER_OBJECT_MOVE = 1
- Transfer references from the caller rather than creating new references.
- enum CUuserObject_flags
-
Flags for user objects for graphs
Values
- CU_USER_OBJECT_NO_DESTRUCTOR_SYNC = 1
- Indicates the destructor execution is not synchronized by any CUDA handle.
- enum cl_context_flags
-
NVCL context scheduling flags
Values
- NVCL_CTX_SCHED_AUTO = 0x00
- Automatic scheduling
- NVCL_CTX_SCHED_SPIN = 0x01
- Set spin as default scheduling
- NVCL_CTX_SCHED_YIELD = 0x02
- Set yield as default scheduling
- NVCL_CTX_SCHED_BLOCKING_SYNC = 0x04
- Set blocking synchronization as default scheduling
- enum cl_event_flags
-
NVCL event scheduling flags
Values
- NVCL_EVENT_SCHED_AUTO = 0x00
- Automatic scheduling
- NVCL_EVENT_SCHED_SPIN = 0x01
- Set spin as default scheduling
- NVCL_EVENT_SCHED_YIELD = 0x02
- Set yield as default scheduling
- NVCL_EVENT_SCHED_BLOCKING_SYNC = 0x04
- Set blocking synchronization as default scheduling