2.11. Enums and Macros

Defines

#define DCGM_CONFIG_COMPUTEMODE_DEFAULT 0
#define DCGM_CONFIG_COMPUTEMODE_EXCLUSIVE_PROCESS 2
#define DCGM_CONFIG_COMPUTEMODE_PROHIBITED 1
#define DCGM_FP64_BLANK 140737488355328.0
#define DCGM_FP64_IS_BLANK ( val )
#define DCGM_FP64_NOT_FOUND (DCGM_FP64_BLANK+1.0)
#define DCGM_FP64_NOT_PERMISSIONED (DCGM_FP64_BLANK+3.0)
#define DCGM_FP64_NOT_SUPPORTED (DCGM_FP64_BLANK+2.0)
#define DCGM_GRID_LICENSE_BUFFER_SIZE 128
#define DCGM_GROUP_ALL_GPUS 0x7fffffff
#define DCGM_GROUP_MAX_ENTITIES 64
#define DCGM_HE_PORT_NUMBER 5555
#define DCGM_INT32_BLANK 0x7ffffff0
#define DCGM_INT32_IS_BLANK ( val )
#define DCGM_INT32_NOT_FOUND (DCGM_INT32_BLANK+1)
#define DCGM_INT32_NOT_PERMISSIONED (DCGM_INT32_BLANK+3)
#define DCGM_INT32_NOT_SUPPORTED (DCGM_INT32_BLANK+2)
#define DCGM_INT64_BLANK 0x7ffffffffffffff0
#define DCGM_INT64_IS_BLANK ( val )
#define DCGM_INT64_NOT_FOUND (DCGM_INT64_BLANK+1)
#define DCGM_INT64_NOT_PERMISSIONED (DCGM_INT64_BLANK+3)
#define DCGM_INT64_NOT_SUPPORTED (DCGM_INT64_BLANK+2)
#define DCGM_MAX_CLOCKS 256
#define DCGM_MAX_NUM_DEVICES 16
#define DCGM_MAX_NUM_GROUPS 64
#define DCGM_MAX_NUM_SWITCHES 12
#define DCGM_MAX_NUM_VGPU_DEVICES
#define DCGM_MAX_STR_LENGTH 256
#define DCGM_MAX_VGPU_INSTANCES_PER_PGPU 32
#define DCGM_STR_BLANK "<<<NULL>>>"
#define DCGM_STR_IS_BLANK ( val )
#define DCGM_STR_NOT_FOUND "<<<NOT_FOUND>>>"
#define DCGM_STR_NOT_PERMISSIONED "<<<NOT_PERM>>>"
#define DCGM_STR_NOT_SUPPORTED "<<<NOT_SUPPORTED>>>"
#define DCGM_VGPU_NAME_BUFFER_SIZE 64
#define MAKE_DCGM_VERSION ( typeName, ver )

Enumerations

enum dcgmConfigPowerLimitType_t
enum dcgmConfigType_t
enum dcgmGroupType_t
enum dcgmOperationMode_t
enum dcgmOrder_t
enum dcgmReturn_t

Defines

#define DCGM_CONFIG_COMPUTEMODE_DEFAULT 0

Default compute mode -- multiple contexts per device

#define DCGM_CONFIG_COMPUTEMODE_EXCLUSIVE_PROCESS 2

Compute-exclusive-process mode -- only one context per device, usable from multiple threads at a time

#define DCGM_CONFIG_COMPUTEMODE_PROHIBITED 1

Compute-prohibited mode -- no contexts per device

#define DCGM_FP64_BLANK 140737488355328.0

Base value for double blank. 2 ** 47. FP 64 has 52 bits of mantissa, so 47 bits can still increment by 1 and represent each value from 0-15

#define DCGM_FP64_IS_BLANK ( val )

Macro to check if a FP64 value is blank or not

Value

(((val) >= DCGM_FP64_BLANK ? 1 : 0))

#define DCGM_FP64_NOT_FOUND (DCGM_FP64_BLANK+1.0)

Represents an error where FP64 data was not found

#define DCGM_FP64_NOT_PERMISSIONED (DCGM_FP64_BLANK+3.0)

Represents and error where fetching the FP64 value is not allowed with our current credentials

#define DCGM_FP64_NOT_SUPPORTED (DCGM_FP64_BLANK+2.0)

Represents an error where fetching the FP64 value is not supported

#define DCGM_GRID_LICENSE_BUFFER_SIZE 128

Represents the size of a buffer that holds a vGPU license string

#define DCGM_GROUP_ALL_GPUS 0x7fffffff

Identifies for special DCGM groups

#define DCGM_GROUP_MAX_ENTITIES 64

Maximum number of entities per entity group

#define DCGM_HE_PORT_NUMBER 5555

Default Port Number for DCGM Host Engine

#define DCGM_INT32_BLANK 0x7ffffff0

Represents value of the field which can be returned by Host Engine in case the operation is not successful Base value for 32 bits integer blank. can be used as an unspecified blank

#define DCGM_INT32_IS_BLANK ( val )

Macro to check if a INT32 value is blank or not

Value

(((val) >= DCGM_INT32_BLANK) ? 1 : 0)

#define DCGM_INT32_NOT_FOUND (DCGM_INT32_BLANK+1)

Represents an error where INT32 data was not found

#define DCGM_INT32_NOT_PERMISSIONED (DCGM_INT32_BLANK+3)

Represents and error where fetching the INT32 value is not allowed with our current credentials

#define DCGM_INT32_NOT_SUPPORTED (DCGM_INT32_BLANK+2)

Represents an error where fetching the INT32 value is not supported

#define DCGM_INT64_BLANK 0x7ffffffffffffff0

Base value for 64 bits integer blank. can be used as an unspecified blank

#define DCGM_INT64_IS_BLANK ( val )

Macro to check if a INT64 value is blank or not

Value

(((val) >= DCGM_INT64_BLANK) ? 1 : 0)

#define DCGM_INT64_NOT_FOUND (DCGM_INT64_BLANK+1)

Represents an error where INT64 data was not found

#define DCGM_INT64_NOT_PERMISSIONED (DCGM_INT64_BLANK+3)

Represents and error where fetching the INT64 value is not allowed with our current credentials

#define DCGM_INT64_NOT_SUPPORTED (DCGM_INT64_BLANK+2)

Represents an error where fetching the INT64 value is not supported

#define DCGM_MAX_CLOCKS 256

Max number of clocks supported for a device

#define DCGM_MAX_NUM_DEVICES 16

Max number of GPUs supported by DCGM

#define DCGM_MAX_NUM_GROUPS 64

Max limit on the number of groups supported by DCGM

#define DCGM_MAX_NUM_SWITCHES 12

Max number of NvSwitches supported by DCGM

#define DCGM_MAX_NUM_VGPU_DEVICES

Max number of vGPUs supported on DCGM

Value

DCGM_MAX_NUM_DEVICES * DCGM_MAX_VGPU_INSTANCES_PER_PGPU

#define DCGM_MAX_STR_LENGTH 256

Max length of the DCGM string field

#define DCGM_MAX_VGPU_INSTANCES_PER_PGPU 32

Maximum number of vGPU instances per physical GPU

#define DCGM_STR_BLANK "<<<NULL>>>"

Base value for string blank.

#define DCGM_STR_IS_BLANK ( val )

Macro to check if a STR value is blank or not Works on (char *). Looks for <<< at first position and >>> inside string

Value

(val == strstr(val, "<<<") && strstr(val, ">>>"))

#define DCGM_STR_NOT_FOUND "<<<NOT_FOUND>>>"

Represents an error where STR data was not found

#define DCGM_STR_NOT_PERMISSIONED "<<<NOT_PERM>>>"

Represents and error where fetching the STR value is not allowed with our current credentials

#define DCGM_STR_NOT_SUPPORTED "<<<NOT_SUPPORTED>>>"

Represents an error where fetching the STR value is not supported

#define DCGM_VGPU_NAME_BUFFER_SIZE 64

Represents the size of a buffer that holds a vGPU type Name or vGPU class type or name of process running on vGPU instance.

#define MAKE_DCGM_VERSION ( typeName, ver )

Creates a unique version number for each struct

Value

(unsigned int)(sizeof(typeName) | ((ver)<<24))

Enumerations

enum dcgmConfigPowerLimitType_t

Represents the power cap for each member of the group.

Values
DCGM_CONFIG_POWER_CAP_INDIVIDUAL = 0
Represents the power cap to be applied for each member of the group.
DCGM_CONFIG_POWER_BUDGET_GROUP = 1
Represents the power budget for the entire group.
enum dcgmConfigType_t

Represents the type of configuration to be fetched from the GPUs

Values
DCGM_CONFIG_TARGET_STATE = 0
The target configuration values to be applied.
DCGM_CONFIG_CURRENT_STATE = 1
The current configuration state.
enum dcgmGroupType_t

Type of GPU groups

Values
DCGM_GROUP_DEFAULT = 0
All the GPUs on the node are added to the group.
DCGM_GROUP_EMPTY = 1
Creates an empty group.
DCGM_GROUP_DEFAULT_NVSWITCHES = 2
All NvSwitches of the node are added to the group.
enum dcgmOperationMode_t

Operation mode for DCGM

DCGM can run in auto-mode where it runs additional threads in the background to collect any metrics of interest and auto manages any operations needed for policy management.

DCGM can also operate in manual-mode where it's execution is controlled by the user. In this mode, the user has to periodically call APIs such as dcgmPolicyTrigger and dcgmUpdateAllFields which tells DCGM to wake up and perform data collection and operations needed for policy management.

Values
DCGM_OPERATION_MODE_AUTO = 1
DCGM_OPERATION_MODE_MANUAL = 2
enum dcgmOrder_t

When more than one value is returned from a query, which order should it be returned in?

Values
DCGM_ORDER_ASCENDING = 1
Data with earliest (lowest) timestamps returned first.
DCGM_ORDER_DESCENDING = 2
Data with latest (highest) timestamps returned first.
enum dcgmReturn_t

Return values for DCGM API calls.

Values
DCGM_ST_OK = 0
Success.
DCGM_ST_BADPARAM = -1
A bad parameter was passed to a function.
DCGM_ST_GENERIC_ERROR = -3
A generic, unspecified error.
DCGM_ST_MEMORY = -4
An out of memory error occurred.
DCGM_ST_NOT_CONFIGURED = -5
Setting not configured.
DCGM_ST_NOT_SUPPORTED = -6
Feature not supported.
DCGM_ST_INIT_ERROR = -7
DCGM Init error.
DCGM_ST_NVML_ERROR = -8
When NVML returns error.
DCGM_ST_PENDING = -9
Object is in pending state of something else.
DCGM_ST_UNINITIALIZED = -10
Object is in undefined state.
DCGM_ST_TIMEOUT = -11
Requested operation timed out.
DCGM_ST_VER_MISMATCH = -12
Version mismatch between received and understood API.
DCGM_ST_UNKNOWN_FIELD = -13
Unknown field id.
DCGM_ST_NO_DATA = -14
No data is available.
DCGM_ST_STALE_DATA = -15
Data is considered stale.
DCGM_ST_NOT_WATCHED = -16
The given field id is not being updated by the cache manager.
DCGM_ST_NO_PERMISSION = -17
Do not have permission to perform the desired action.
DCGM_ST_GPU_IS_LOST = -18
GPU is no longer reachable.
DCGM_ST_RESET_REQUIRED = -19
GPU requires a reset.
DCGM_ST_FUNCTION_NOT_FOUND = -20
The function that was requested was not found (bindings only error).
DCGM_ST_CONNECTION_NOT_VALID = -21
The connection to the host engine is not valid any longer.
DCGM_ST_GPU_NOT_SUPPORTED = -22
This GPU is not supported by DCGM.
DCGM_ST_GROUP_INCOMPATIBLE = -23
The GPUs of the provided group are not compatible with each other for the requested operation.
DCGM_ST_MAX_LIMIT = -24
Max limit reached for the object.
DCGM_ST_LIBRARY_NOT_FOUND = -25
DCGM library could not be found.
DCGM_ST_DUPLICATE_KEY = -26
Duplicate key passed to a function.
DCGM_ST_GPU_IN_SYNC_BOOST_GROUP = -27
GPU is already a part of a sync boost group.
DCGM_ST_GPU_NOT_IN_SYNC_BOOST_GROUP = -28
GPU is not a part of a sync boost group.
DCGM_ST_REQUIRES_ROOT = -29
This operation cannot be performed when the host engine is running as non-root.
DCGM_ST_NVVS_ERROR = -30
DCGM GPU Diagnostic was successfully executed, but reported an error.
DCGM_ST_INSUFFICIENT_SIZE = -31
An input argument is not large enough.
DCGM_ST_FIELD_UNSUPPORTED_BY_API = -32
The given field ID is not supported by the API being called.