2.15. Structure definitions

Classes

struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 
struct 

Defines

#define DCGM_DEVICE_UUID_BUFFER_SIZE 80
#define DCGM_FV_FLAG_LIVE_DATA 0x00000001
#define DCGM_GEGE_FLAG_ONLY_SUPPORTED 0x00000001
This mimics the behavior of dcgmGetAllSupportedDevices().
#define DCGM_HEALTH_WATCH_COUNT_V1 10
For iterating through the dcgmHealthSystems_v1 enum.
#define DCGM_HEALTH_WATCH_COUNT_V2 12
For iterating through the dcgmHealthSystems_v2 enum.
#define DCGM_MAX_BLOB_LENGTH 4096
Set above size of largest blob entry. Currently this is dcgmDeviceVgpuTypeInfo_v1.
#define DCGM_MAX_FIELD_IDS_PER_FIELD_GROUP 128
#define DCGM_MAX_NUM_FIELD_GROUPS 64
#define DCGM_MAX_VGPU_TYPES_PER_PGPU 32
#define DCGM_MODULE_STATUSES_CAPACITY 16
This is larger than DcgmModuleIdCount so we can add modules without versioning this request.
#define DCGM_PROF_MAX_FIELD_IDS_PER_GROUP 8
Maximum number of field IDs that can be in a single DCGM profiling metric group.
#define DCGM_PROF_MAX_NUM_GROUPS 10
Maximum number of metric ID groups that can exist in DCGM.
#define dcgmAllFieldGroup_version
#define dcgmAllFieldGroup_version1
#define dcgmClockSet_version dcgmClockSet_version1
#define dcgmClockSet_version1
#define dcgmConfig_version dcgmConfig_version1
#define dcgmConfig_version1
#define dcgmConnectV2Params_version
#define dcgmConnectV2Params_version1
#define dcgmConnectV2Params_version2
#define dcgmDeviceAttributes_version
#define dcgmDeviceAttributes_version1
#define dcgmDeviceEncStats_version
#define dcgmDeviceEncStats_version1
#define dcgmDeviceFbcSessionInfo_version
#define dcgmDeviceFbcSessionInfo_version1
#define dcgmDeviceFbcSessions_version
#define dcgmDeviceFbcSessions_version1
#define dcgmDeviceFbcStats_version
#define dcgmDeviceFbcStats_version1
#define dcgmDeviceIdentifiers_version
#define dcgmDeviceIdentifiers_version1
#define dcgmDeviceMemoryUsage_version
#define dcgmDeviceMemoryUsage_version1
#define dcgmDevicePidAccountingStats_version
#define dcgmDevicePidAccountingStats_version1
#define dcgmDevicePowerLimits_version
#define dcgmDevicePowerLimits_version1
#define dcgmDeviceSupportedClockSets_version
#define dcgmDeviceSupportedClockSets_version1
#define dcgmDeviceThermals_version
#define dcgmDeviceThermals_version1
#define dcgmDeviceTopology_version
#define dcgmDeviceTopology_version1
#define dcgmDeviceVgpuEncSessions_version
#define dcgmDeviceVgpuEncSessions_version1
#define dcgmDeviceVgpuIds_version
#define dcgmDeviceVgpuIds_version1
#define dcgmDeviceVgpuProcessUtilInfo_version
#define dcgmDeviceVgpuProcessUtilInfo_version1
#define dcgmDeviceVgpuTypeInfo_version
#define dcgmDeviceVgpuTypeInfo_version1
#define dcgmDeviceVgpuUtilInfo_version
#define dcgmDeviceVgpuUtilInfo_version1
#define dcgmDiagResponse_version
#define dcgmDiagResponse_version3
#define dcgmDiagResponse_version4
#define dcgmDiagResponse_version5
#define dcgmFieldGroupInfo_version
#define dcgmFieldGroupInfo_version1
#define dcgmFieldValue_version1
#define dcgmFieldValue_version2
#define dcgmGroupInfo_version dcgmGroupInfo_version2
#define dcgmGroupInfo_version1
#define dcgmGroupInfo_version2
#define dcgmGroupTopology_version
#define dcgmGroupTopology_version1
#define dcgmHealthResponse_version
#define dcgmHealthResponse_version1
#define dcgmHealthResponse_version2
#define dcgmHealthResponse_version3
#define dcgmIntrospectContext_version
#define dcgmIntrospectContext_version1
#define dcgmIntrospectCpuUtil_version
#define dcgmIntrospectCpuUtil_version1
#define dcgmIntrospectFieldsExecTime_version
#define dcgmIntrospectFieldsExecTime_version1
#define dcgmIntrospectFullFieldsExecTime_version
#define dcgmIntrospectFullFieldsExecTime_version1
#define dcgmIntrospectFullMemory_version
#define dcgmIntrospectFullMemory_version1
#define dcgmIntrospectMemory_version
#define dcgmIntrospectMemory_version1
#define dcgmJobInfo_version dcgmJobInfo_version2
#define dcgmJobInfo_version2
#define dcgmModuleGetStatuses_version1
#define dcgmNvLinkStatus_version1
#define dcgmPidInfo_version dcgmPidInfo_version1
#define dcgmPidInfo_version1
#define dcgmPolicyCallbackResponse_version
#define dcgmPolicyCallbackResponse_version1
#define dcgmPolicy_version dcgmPolicy_version1
#define dcgmPolicy_version1
#define dcgmProfGetMetricGroups_version2
#define dcgmProfUnwatchFields_version1
#define dcgmProfWatchFields_version1
#define dcgmRunDiag_version dcgmRunDiag_version5
#define dcgmRunDiag_version1
#define dcgmRunDiag_version2
#define dcgmRunDiag_version3
#define dcgmRunDiag_version4
#define dcgmRunDiag_version5
#define dcgmRunningProcess_version
#define dcgmRunningProcess_version1
#define dcgmVersionInfo_version1
#define dcgmVgpuConfig_version dcgmVgpuConfig_version1
#define dcgmVgpuConfig_version1
#define dcgmVgpuDeviceAttributes_version
#define dcgmVgpuDeviceAttributes_version6
#define dcgmVgpuInstanceAttributes_version
#define dcgmVgpuInstanceAttributes_version1

Typedefs

typedef struct dcgmClockSet_v1 dcgmClockSet_t
typedef struct dcgmConfig_v1 dcgmConfig_t
typedef struct dcgmConnectV2Params_v2 dcgmConnectV2Params_t
typedef struct dcgmDeviceAttributes_v1 dcgmDeviceAttributes_t
typedef struct dcgmDeviceEncStats_v1 dcgmDeviceEncStats_t
typedef struct dcgmDeviceFbcSessionInfo_v1 dcgmDeviceFbcSessionInfo_t
typedef struct dcgmDeviceFbcSessions_v1 dcgmDeviceFbcSessions_t
typedef struct dcgmDeviceFbcStats_v1 dcgmDeviceFbcStats_t
typedef struct dcgmDeviceIdentifiers_v1 dcgmDeviceIdentifiers_t
typedef struct dcgmDeviceMemoryUsage_v1 dcgmDeviceMemoryUsage_t
typedef struct dcgmDevicePidAccountingStats_v1 dcgmDevicePidAccountingStats_t
typedef struct dcgmDevicePowerLimits_v1 dcgmDevicePowerLimits_t
typedef struct dcgmDeviceSupportedClockSets_v1 dcgmDeviceSupportedClockSets_t
typedef struct dcgmDeviceThermals_v1 dcgmDeviceThermals_t
typedef struct dcgmDeviceTopology_v1 dcgmDeviceTopology_t
typedef struct dcgmDeviceVgpuEncSessions_v1 dcgmDeviceVgpuEncSessions_t
typedef struct dcgmDeviceVgpuIds_v1 dcgmDeviceVgpuIds_t
typedef struct dcgmDeviceVgpuProcessUtilInfo_v1 dcgmDeviceVgpuProcessUtilInfo_t
typedef struct dcgmDeviceVgpuTypeInfo_v1 dcgmDeviceVgpuTypeInfo_t
typedef struct dcgmDeviceVgpuUtilInfo_v1 dcgmDeviceVgpuUtilInfo_t
typedef dcgmDiagResponse_v5  dcgmDiagResponse_t
typedef void *  dcgmFieldGrp_t
Identifier for a group of fields.
typedef int(  ( *dcgmFieldValueEntityEnumeration_f )( dcgm_field_entity_group_t entityGroupId,  dcgm_field_eid_t entityId, dcgmFieldValue_v1*  values,  int numValues, void*  userData )
typedef int(  ( *dcgmFieldValueEnumeration_f )( unsigned int gpuId, dcgmFieldValue_v1*  values,  int numValues, void*  userData )
typedef void *  dcgmGpuGrp_t
Identifier for a group of GPUs. A group can have one or more GPUs.
typedef struct dcgmGroupInfo_v2 dcgmGroupInfo_t
typedef struct dcgmGroupTopology_v1 dcgmGroupTopology_t
typedef void *  dcgmHandle_t
Identifier for DCGM Handle.
typedef struct dcgmHealthResponse_v3 dcgmHealthResponse_t
typedef struct dcgmIntrospectContext_v1 dcgmIntrospectContext_t
typedef struct dcgmIntrospectCpuUtil_v1 dcgmIntrospectCpuUtil_t
typedef struct dcgmIntrospectFieldsExecTime_v1 dcgmIntrospectFieldsExecTime_t
typedef struct dcgmIntrospectFullFieldsExecTime_v1 dcgmIntrospectFullFieldsExecTime_t
typedef struct dcgmIntrospectFullMemory_v1 dcgmIntrospectFullMemory_t
typedef struct dcgmIntrospectMemory_v1 dcgmIntrospectMemory_t
typedef struct dcgmJobInfo_v2 dcgmJobInfo_t
typedef struct dcgmPidInfo_v1 dcgmPidInfo_t
typedef struct dcgmPolicyCallbackResponse_v1 dcgmPolicyCallbackResponse_t
typedef struct dcgmPolicy_v1 dcgmPolicy_t
typedef dcgmRunDiag_v5  dcgmRunDiag_t
typedef struct dcgmRunningProcess_v1 dcgmRunningProcess_t
typedef void *  dcgmStatus_t
Identifier for list of status codes.
typedef struct dcgmVgpuConfig_v1 dcgmVgpuConfig_t
typedef struct dcgmVgpuDeviceAttributes_v6 dcgmVgpuDeviceAttributes_t
typedef struct dcgmVgpuInstanceAttributes_v1 dcgmVgpuInstanceAttributes_t
typedef int(  ( *fpRecvUpdates )( void*  userData )

Enumerations

enum dcgmDiagResult_t
enum dcgmDiagnosticLevel_t
enum dcgmGpuNVLinkErrorType_t
enum dcgmGpuTopologyLevel_t
enum dcgmHealthSystems_t
enum dcgmHealthWatchResults_t
enum dcgmIntrospectLevel_t
enum dcgmIntrospectState_t
enum dcgmModuleId_t
enum dcgmModuleStatus_t
enum dcgmNvLinkLinkState_t
enum dcgmPerGpuTestIndices_t
enum dcgmPolicyAction_t
enum dcgmPolicyCondition_t
enum dcgmPolicyFailureResp_t
enum dcgmPolicyIsolation_t
enum dcgmPolicyMode_t
enum dcgmPolicyValidation_t

Defines

#define DCGM_DEVICE_UUID_BUFFER_SIZE 80

Represents the size of a buffer that holds string related to attributes specific to vGPU instance

#define DCGM_FV_FLAG_LIVE_DATA 0x00000001

Field value flags used by dcgmEntitiesGetLatestValues

#define DCGM_GEGE_FLAG_ONLY_SUPPORTED 0x00000001

Flags for dcgmGetEntityGroupEntities's flags parameter Only return entities that are supported by DCGM.

#define DCGM_HEALTH_WATCH_COUNT_V1 10

#define DCGM_HEALTH_WATCH_COUNT_V2 12

#define DCGM_MAX_BLOB_LENGTH 4096

#define DCGM_MAX_FIELD_IDS_PER_FIELD_GROUP 128

Maximum number of field IDs that can be in a single field group

#define DCGM_MAX_NUM_FIELD_GROUPS 64

Maximum number of field groups that can exist

#define DCGM_MAX_VGPU_TYPES_PER_PGPU 32

Maximum number of vGPU types per physical GPU

#define DCGM_MODULE_STATUSES_CAPACITY 16

#define DCGM_PROF_MAX_FIELD_IDS_PER_GROUP 8

#define DCGM_PROF_MAX_NUM_GROUPS 10

Structure to return all of the profiling metric groups that are available for the given groupId.

#define dcgmAllFieldGroup_version

Latest version for dcgmAllFieldGroup_t

Value

dcgmAllFieldGroup_version1

#define dcgmAllFieldGroup_version1

Version 1 for dcgmAllFieldGroup_v1

Value

MAKE_DCGM_VERSION(dcgmAllFieldGroup_v1, 1)

#define dcgmClockSet_version dcgmClockSet_version1

Latest version for dcgmClockSet_t

#define dcgmClockSet_version1

Version 1 for dcgmClockSet_v1

Value

MAKE_DCGM_VERSION(dcgmClockSet_v1, 1)

#define dcgmConfig_version dcgmConfig_version1

Latest version for dcgmConfig_t

#define dcgmConfig_version1

Version 1 for dcgmConfig_v1

Value

MAKE_DCGM_VERSION(dcgmConfig_v1, 1)

#define dcgmConnectV2Params_version

Latest version for dcgmConnectV2Params_t

Value

dcgmConnectV2Params_version2

#define dcgmConnectV2Params_version1

Version 1 for dcgmConnectV2Params_v1

Value

MAKE_DCGM_VERSION(dcgmConnectV2Params_v1, 1)

#define dcgmConnectV2Params_version2

Version 2 for dcgmConnectV2Params_v2

Value

MAKE_DCGM_VERSION(dcgmConnectV2Params_v2, 2)

#define dcgmDeviceAttributes_version

Latest version for dcgmDeviceAttributes_t

Value

dcgmDeviceAttributes_version1

#define dcgmDeviceAttributes_version1

Version 1 for dcgmDeviceAttributes_v1

Value

MAKE_DCGM_VERSION(dcgmDeviceAttributes_v1, 1)

#define dcgmDeviceEncStats_version

Latest version for dcgmDeviceEncStats_t

Value

dcgmDeviceEncStats_version1

#define dcgmDeviceEncStats_version1

Version 1 for dcgmDeviceEncStats_v1

Value

MAKE_DCGM_VERSION(dcgmDeviceEncStats_v1, 1)

#define dcgmDeviceFbcSessionInfo_version

Latest version for dcgmDeviceFbcSessionInfo_t

Value

dcgmDeviceFbcSessionInfo_version1

#define dcgmDeviceFbcSessionInfo_version1
Value

MAKE_DCGM_VERSION(dcgmDeviceFbcSessionInfo_v1, 1)

#define dcgmDeviceFbcSessions_version

Latest version for dcgmDeviceFbcSessions_t

Value

dcgmDeviceFbcSessions_version1

#define dcgmDeviceFbcSessions_version1

Version 1 for dcgmDeviceFbcSessions_v1

Value

MAKE_DCGM_VERSION(dcgmDeviceFbcSessions_v1, 1)

#define dcgmDeviceFbcStats_version

Latest version for dcgmDeviceEncStats_t

Value

dcgmDeviceFbcStats_version1

#define dcgmDeviceFbcStats_version1

Version 1 for dcgmDeviceFbcStats_v1

Value

MAKE_DCGM_VERSION(dcgmDeviceFbcStats_v1, 1)

#define dcgmDeviceIdentifiers_version

Latest version for dcgmDeviceIdentifiers_t

Value

dcgmDeviceIdentifiers_version1

#define dcgmDeviceIdentifiers_version1

Version 1 for dcgmDeviceIdentifiers_v1

Value

MAKE_DCGM_VERSION(dcgmDeviceIdentifiers_v1, 1)

#define dcgmDeviceMemoryUsage_version

Latest version for dcgmDeviceMemoryUsage_t

Value

dcgmDeviceMemoryUsage_version1

#define dcgmDeviceMemoryUsage_version1

Version 1 for dcgmDeviceMemoryUsage_v1

Value

MAKE_DCGM_VERSION(dcgmDeviceMemoryUsage_v1, 1)

#define dcgmDevicePidAccountingStats_version

Latest version for dcgmDevicePidAccountingStats_t

Value

dcgmDevicePidAccountingStats_version1

#define dcgmDevicePidAccountingStats_version1
Value

MAKE_DCGM_VERSION(dcgmDevicePidAccountingStats_v1, 1)

#define dcgmDevicePowerLimits_version

Latest version for dcgmDevicePowerLimits_t

Value

dcgmDevicePowerLimits_version1

#define dcgmDevicePowerLimits_version1

Version 1 for dcgmDevicePowerLimits_v1

Value

MAKE_DCGM_VERSION(dcgmDevicePowerLimits_v1, 1)

#define dcgmDeviceSupportedClockSets_version

Latest version for dcgmDeviceSupportedClockSets_t

Value

dcgmDeviceSupportedClockSets_version1

#define dcgmDeviceSupportedClockSets_version1
Value

MAKE_DCGM_VERSION(dcgmDeviceSupportedClockSets_v1, 1)

#define dcgmDeviceThermals_version

Latest version for dcgmDeviceThermals_t

Value

dcgmDeviceThermals_version1

#define dcgmDeviceThermals_version1

Version 1 for dcgmDeviceThermals_v1

Value

MAKE_DCGM_VERSION(dcgmDeviceThermals_v1, 1)

#define dcgmDeviceTopology_version

Latest version for dcgmDeviceTopology_t

Value

dcgmDeviceTopology_version1

#define dcgmDeviceTopology_version1

Version 1 for dcgmDeviceTopology_v1

Value

MAKE_DCGM_VERSION(dcgmDeviceTopology_v1, 1)

#define dcgmDeviceVgpuEncSessions_version

Latest version for dcgmDeviceVgpuEncSessions_t

Value

dcgmDeviceVgpuEncSessions_version1

#define dcgmDeviceVgpuEncSessions_version1
Value

MAKE_DCGM_VERSION(dcgmDeviceVgpuEncSessions_v1, 1)

#define dcgmDeviceVgpuIds_version

Latest version for dcgmDeviceVgpuIds_t

Value

dcgmDeviceVgpuIds_version1

#define dcgmDeviceVgpuIds_version1

Version 1 for dcgmDeviceVgpuIds_v1

Value

MAKE_DCGM_VERSION(dcgmDeviceVgpuIds_v1, 1)

#define dcgmDeviceVgpuProcessUtilInfo_version

Latest version for dcgmDeviceVgpuProcessUtilInfo_t

Value

dcgmDeviceVgpuProcessUtilInfo_version1

#define dcgmDeviceVgpuProcessUtilInfo_version1
Value

MAKE_DCGM_VERSION(dcgmDeviceVgpuProcessUtilInfo_v1, 1)

#define dcgmDeviceVgpuTypeInfo_version

Latest version for dcgmDeviceVgpuTypeInfo_t

Value

dcgmDeviceVgpuTypeInfo_version1

#define dcgmDeviceVgpuTypeInfo_version1
Value

MAKE_DCGM_VERSION(dcgmDeviceVgpuTypeInfo_v1, 1)

#define dcgmDeviceVgpuUtilInfo_version

Latest version for dcgmDeviceVgpuUtilInfo_t

Value

dcgmDeviceVgpuUtilInfo_version1

#define dcgmDeviceVgpuUtilInfo_version1
Value

MAKE_DCGM_VERSION(dcgmDeviceVgpuUtilInfo_v1, 1)

#define dcgmDiagResponse_version

Latest version for dcgmDiagResponse_t

Value

dcgmDiagResponse_version5

#define dcgmDiagResponse_version3

Version 3 for dcgmDiagResponse_v3

Value

MAKE_DCGM_VERSION(dcgmDiagResponse_v3, 3)

#define dcgmDiagResponse_version4

Version 4 for dcgmDiagResponse_v4

Value

MAKE_DCGM_VERSION(dcgmDiagResponse_v4, 4)

#define dcgmDiagResponse_version5

Version 5 for dcgmDiagResponse_v5

Value

MAKE_DCGM_VERSION(dcgmDiagResponse_v5, 5)

#define dcgmFieldGroupInfo_version

Latest version for dcgmFieldGroupInfo_t

Value

dcgmFieldGroupInfo_version1

#define dcgmFieldGroupInfo_version1

Version 1 for dcgmFieldGroupInfo_v1

Value

MAKE_DCGM_VERSION(dcgmFieldGroupInfo_v1, 1)

#define dcgmFieldValue_version1

Version 1 for dcgmFieldValue_v1

Value

MAKE_DCGM_VERSION(dcgmFieldValue_v1, 1)

#define dcgmFieldValue_version2

Version 2 for dcgmFieldValue_v2

Value

MAKE_DCGM_VERSION(dcgmFieldValue_v2, 2)

#define dcgmGroupInfo_version dcgmGroupInfo_version2

Latest version for dcgmGroupInfo_t

#define dcgmGroupInfo_version1

Version 1 for dcgmGroupInfo_v1

Value

MAKE_DCGM_VERSION(dcgmGroupInfo_v1, 1)

#define dcgmGroupInfo_version2

Version 2 for dcgmGroupInfo_v2

Value

MAKE_DCGM_VERSION(dcgmGroupInfo_v2, 2)

#define dcgmGroupTopology_version

Latest version for dcgmGroupTopology_t

Value

dcgmGroupTopology_version1

#define dcgmGroupTopology_version1

Version 1 for dcgmGroupTopology_v1

Value

MAKE_DCGM_VERSION(dcgmGroupTopology_v1, 1)

#define dcgmHealthResponse_version

Latest version for dcgmHealthResponse_t

Value

dcgmHealthResponse_version3

#define dcgmHealthResponse_version1

Version 1 for dcgmHealthResponse_v1

Value

MAKE_DCGM_VERSION(dcgmHealthResponse_v1, 1)

#define dcgmHealthResponse_version2

Version 2 for dcgmHealthResponse_v2

Value

MAKE_DCGM_VERSION(dcgmHealthResponse_v2, 2)

#define dcgmHealthResponse_version3

Version 3 for dcgmHealthResponse_v3

Value

MAKE_DCGM_VERSION(dcgmHealthResponse_v3, 3)

#define dcgmIntrospectContext_version

Latest version for dcgmIntrospectContext_t

Value

dcgmIntrospectContext_version1

#define dcgmIntrospectContext_version1

Version 1 for dcgmIntrospectContext_t

Value

MAKE_DCGM_VERSION(dcgmIntrospectContext_v1, 1)

#define dcgmIntrospectCpuUtil_version

Latest version for dcgmIntrospectCpuUtil_t

Value

dcgmIntrospectCpuUtil_version1

#define dcgmIntrospectCpuUtil_version1

Version 1 for dcgmIntrospectCpuUtil_t

Value

MAKE_DCGM_VERSION(dcgmIntrospectCpuUtil_v1, 1)

#define dcgmIntrospectFieldsExecTime_version

Latest version for dcgmIntrospectFieldsExecTime_t

Value

dcgmIntrospectFieldsExecTime_version1

#define dcgmIntrospectFieldsExecTime_version1
Value

MAKE_DCGM_VERSION(dcgmIntrospectFieldsExecTime_v1, 1)

#define dcgmIntrospectFullFieldsExecTime_version
Value

dcgmIntrospectFullFieldsExecTime_version1

#define dcgmIntrospectFullFieldsExecTime_version1
Value

MAKE_DCGM_VERSION(dcgmIntrospectFullFieldsExecTime_v1, 1)

#define dcgmIntrospectFullMemory_version

Latest version for dcgmIntrospectFullMemory_t

Value

dcgmIntrospectFullMemory_version1

#define dcgmIntrospectFullMemory_version1
Value

MAKE_DCGM_VERSION(dcgmIntrospectFullMemory_v1, 1)

#define dcgmIntrospectMemory_version

Latest version for dcgmIntrospectMemory_t

Value

dcgmIntrospectMemory_version1

#define dcgmIntrospectMemory_version1

Version 1 for dcgmIntrospectMemory_t

Value

MAKE_DCGM_VERSION(dcgmIntrospectMemory_v1, 1)

#define dcgmJobInfo_version dcgmJobInfo_version2

Latest version for dcgmJobInfo_t

#define dcgmJobInfo_version2

Version 2 for dcgmJobInfo_v2

Value

MAKE_DCGM_VERSION(dcgmJobInfo_v2, 2)

#define dcgmModuleGetStatuses_version1

Version 1 of dcgmModuleGetStatuses

Value

MAKE_DCGM_VERSION(dcgmModuleGetStatuses_v1, 1)

#define dcgmNvLinkStatus_version1

Version 1 of dcgmNvLinkStatus

Value

MAKE_DCGM_VERSION(dcgmNvLinkStatus_v1, 1)

#define dcgmPidInfo_version dcgmPidInfo_version1

Latest version for dcgmPidInfo_t

#define dcgmPidInfo_version1

Version 1 for dcgmPidInfo_v1

Value

MAKE_DCGM_VERSION(dcgmPidInfo_v1, 1)

#define dcgmPolicyCallbackResponse_version

Latest version for dcgmPolicyCallbackResponse_t

Value

dcgmPolicyCallbackResponse_version1

#define dcgmPolicyCallbackResponse_version1
Value

MAKE_DCGM_VERSION(dcgmPolicyCallbackResponse_v1, 1)

#define dcgmPolicy_version dcgmPolicy_version1

Latest version for dcgmPolicy_t

#define dcgmPolicy_version1

Version 1 for dcgmPolicy_v1

Value

MAKE_DCGM_VERSION(dcgmPolicy_v1, 1)

#define dcgmProfGetMetricGroups_version2

Version 1 of dcgmProfGetMetricGroups_t

Value

MAKE_DCGM_VERSION(dcgmProfGetMetricGroups_v2, 2)

#define dcgmProfUnwatchFields_version1
Value

MAKE_DCGM_VERSION(dcgmProfUnwatchFields_v1, 1)

#define dcgmProfWatchFields_version1

Version 1 of dcgmProfWatchFields_v1

Value

MAKE_DCGM_VERSION(dcgmProfWatchFields_v1, 1)

#define dcgmRunDiag_version dcgmRunDiag_version5

Latest version for dcgmRunDiag_t

#define dcgmRunDiag_version1

Version 1 for dcgmRunDiag_t

Value

MAKE_DCGM_VERSION(dcgmRunDiag_v1, 1)

#define dcgmRunDiag_version2

Version 2 for dcgmRunDiag_t

Value

MAKE_DCGM_VERSION(dcgmRunDiag_v2, 2)

#define dcgmRunDiag_version3

Version 3 for dcgmRunDiag_t

Value

MAKE_DCGM_VERSION(dcgmRunDiag_v3, 3)

#define dcgmRunDiag_version4

Version 4 for dcgmRunDiag_t

Value

MAKE_DCGM_VERSION(dcgmRunDiag_v4, 4)

#define dcgmRunDiag_version5

Version 5 for dcgmRunDiag_t

Value

MAKE_DCGM_VERSION(dcgmRunDiag_v5, 5)

#define dcgmRunningProcess_version

Latest version for dcgmRunningProcess_t

Value

dcgmRunningProcess_version1

#define dcgmRunningProcess_version1

Version 1 for dcgmRunningProcess_v1

Value

MAKE_DCGM_VERSION(dcgmRunningProcess_v1, 1)

#define dcgmVersionInfo_version1

Version 1 of dcgmVersionInfo_v1;

Value

MAKE_DCGM_VERSION(dcgmVersionInfo_v1, 1)

#define dcgmVgpuConfig_version dcgmVgpuConfig_version1

Latest version for dcgmVgpuConfig_t

#define dcgmVgpuConfig_version1

Version 1 for dcgmVgpuConfig_v1

Value

MAKE_DCGM_VERSION(dcgmVgpuConfig_v1, 1)

#define dcgmVgpuDeviceAttributes_version

Latest version for dcgmVgpuDeviceAttributes_t

Value

dcgmVgpuDeviceAttributes_version6

#define dcgmVgpuDeviceAttributes_version6
Value

MAKE_DCGM_VERSION(dcgmVgpuDeviceAttributes_v6, 1)

#define dcgmVgpuInstanceAttributes_version

Latest version for dcgmVgpuInstanceAttributes_t

Value

dcgmVgpuInstanceAttributes_version1

#define dcgmVgpuInstanceAttributes_version1
Value

MAKE_DCGM_VERSION(dcgmVgpuInstanceAttributes_v1, 1)

Typedefs

typedef struct dcgmClockSet_v1 dcgmClockSet_t

Typedef for dcgmClockSet_v1

typedef struct dcgmConfig_v1 dcgmConfig_t

Typedef for dcgmConfig_v1

typedef struct dcgmConnectV2Params_v2 dcgmConnectV2Params_t
typedef struct dcgmDeviceAttributes_v1 dcgmDeviceAttributes_t
typedef struct dcgmDeviceEncStats_v1 dcgmDeviceEncStats_t

Typedef for dcgmDeviceEncStats_v1

typedef struct dcgmDeviceFbcSessionInfo_v1 dcgmDeviceFbcSessionInfo_t
typedef struct dcgmDeviceFbcSessions_v1 dcgmDeviceFbcSessions_t
typedef struct dcgmDeviceFbcStats_v1 dcgmDeviceFbcStats_t

Typedef for dcgmDeviceFbcStats_v1

typedef struct dcgmDeviceIdentifiers_v1 dcgmDeviceIdentifiers_t
typedef struct dcgmDeviceMemoryUsage_v1 dcgmDeviceMemoryUsage_t
typedef struct dcgmDevicePidAccountingStats_v1 dcgmDevicePidAccountingStats_t
typedef struct dcgmDevicePowerLimits_v1 dcgmDevicePowerLimits_t
typedef struct dcgmDeviceSupportedClockSets_v1 dcgmDeviceSupportedClockSets_t
typedef struct dcgmDeviceThermals_v1 dcgmDeviceThermals_t

Typedef for dcgmDeviceThermals_v1

typedef struct dcgmDeviceTopology_v1 dcgmDeviceTopology_t

Typedef for dcgmDeviceTopology_v1

typedef struct dcgmDeviceVgpuEncSessions_v1 dcgmDeviceVgpuEncSessions_t
typedef struct dcgmDeviceVgpuIds_v1 dcgmDeviceVgpuIds_t

Typedef for dcgmDeviceVgpuIds_v1

typedef struct dcgmDeviceVgpuProcessUtilInfo_v1 dcgmDeviceVgpuProcessUtilInfo_t
typedef struct dcgmDeviceVgpuTypeInfo_v1 dcgmDeviceVgpuTypeInfo_t
typedef struct dcgmDeviceVgpuUtilInfo_v1 dcgmDeviceVgpuUtilInfo_t
typedef dcgmDiagResponse_v5 dcgmDiagResponse_t

Typedef for dcgmDiagResponse_v4

typedef void * dcgmFieldGrp_t

Identifier for a group of fields.

int( ( *dcgmFieldValueEntityEnumeration_f )( dcgm_field_entity_group_t entityGroupId,  dcgm_field_eid_t entityId, dcgmFieldValue_v1*  values,  int numValues, void*  userData )

User callback function for processing one or more field updates. This callback will be invoked one or more times per field until all of the expected field values have been enumerated. It is up to the callee to detect when the field id changes

Returns 0 if OK <0 if enumeration should stop. This allows to callee to abort field value enumeration.

Parameters
entityGroupId
IN: entityGroup of the entity this field value set belongs to
dcgm_field_eid_t entityId
values
IN: Field values. These values must be copied as they will be destroyed as soon as this call returns.
int numValues
userData
IN: User data pointer passed to the update function that generated this callback
int( ( *dcgmFieldValueEnumeration_f )( unsigned int gpuId, dcgmFieldValue_v1*  values,  int numValues, void*  userData )

User callback function for processing one or more field updates. This callback will be invoked one or more times per field until all of the expected field values have been enumerated. It is up to the callee to detect when the field id changes

Returns 0 if OK <0 if enumeration should stop. This allows to callee to abort field value enumeration.

Parameters
int gpuId
values
IN: Field values. These values must be copied as they will be destroyed as soon as this call returns.
int numValues
userData
IN: User data pointer passed to the update function that generated this callback
typedef void * dcgmGpuGrp_t

Identifier for a group of GPUs. A group can have one or more GPUs.

typedef struct dcgmGroupInfo_v2 dcgmGroupInfo_t

Typedef for dcgmGroupInfo_v2

typedef struct dcgmGroupTopology_v1 dcgmGroupTopology_t

Typedef for dcgmGroupTopology_v1

typedef void * dcgmHandle_t

Identifier for DCGM Handle.

typedef struct dcgmHealthResponse_v3 dcgmHealthResponse_t

Typedef for dcgmHealthResponse_v3

typedef struct dcgmIntrospectContext_v1 dcgmIntrospectContext_t
typedef struct dcgmIntrospectCpuUtil_v1 dcgmIntrospectCpuUtil_t
typedef struct dcgmIntrospectFieldsExecTime_v1 dcgmIntrospectFieldsExecTime_t
typedef struct dcgmIntrospectFullFieldsExecTime_v1 dcgmIntrospectFullFieldsExecTime_t
typedef struct dcgmIntrospectFullMemory_v1 dcgmIntrospectFullMemory_t
typedef struct dcgmIntrospectMemory_v1 dcgmIntrospectMemory_t
typedef struct dcgmJobInfo_v2 dcgmJobInfo_t

Typedef for dcgmJobInfo_v2

typedef struct dcgmPidInfo_v1 dcgmPidInfo_t

Typedef for dcgmPidInfo_v1

typedef struct dcgmPolicyCallbackResponse_v1 dcgmPolicyCallbackResponse_t
typedef struct dcgmPolicy_v1 dcgmPolicy_t

Typedef for dcgmPolicy_v1

typedef dcgmRunDiag_v5 dcgmRunDiag_t

Typedef for dcgmRunDiag_t

typedef struct dcgmRunningProcess_v1 dcgmRunningProcess_t

Typedef for dcgmRunningProcess_v1

typedef void * dcgmStatus_t

Identifier for list of status codes.

typedef struct dcgmVgpuConfig_v1 dcgmVgpuConfig_t

Typedef for dcgmVgpuConfig_v1

typedef struct dcgmVgpuDeviceAttributes_v6 dcgmVgpuDeviceAttributes_t
typedef struct dcgmVgpuInstanceAttributes_v1 dcgmVgpuInstanceAttributes_t
int( ( *fpRecvUpdates )( void*  userData )

Represents a callback to receive updates from asynchronous functions. Currently the only implemented callback function is dcgmPolicyRegister and the void * data will be a pointer to dcgmPolicyCallbackResponse_t. Ex. dcgmPolicyCallbackResponse_t *callbackResponse = (dcgmPolicyCallbackResponse_t *) userData;

Enumerations

enum dcgmDiagResult_t

Diagnostic test results

Values
DCGM_DIAG_RESULT_PASS = 0
This test passed as diagnostics.
DCGM_DIAG_RESULT_SKIP = 1
This test was skipped.
DCGM_DIAG_RESULT_WARN = 2
This test passed with warnings.
DCGM_DIAG_RESULT_FAIL = 3
This test failed the diagnostics.
DCGM_DIAG_RESULT_NOT_RUN = 4
This test wasn't executed.
enum dcgmDiagnosticLevel_t

Enumeration for diagnostic levels

Values
DCGM_DIAG_LVL_INVALID = 0
Uninitialized.
DCGM_DIAG_LVL_SHORT = 10
run a very basic health check on the system
DCGM_DIAG_LVL_MED = 20
run a medium-length diagnostic (a few minutes)
DCGM_DIAG_LVL_LONG = 30
run a extensive diagnostic (several minutes)
enum dcgmGpuNVLinkErrorType_t

Identifies a GPU NVLink error type returned by DCGM_FI_DEV_GPU_NVLINK_ERRORS

Values
DCGM_GPU_NVLINK_ERROR_RECOVERY_REQUIRED = 1
NVLink link recovery error occurred.
DCGM_GPU_NVLINK_ERROR_FATAL
NVLink link fatal error occurred.
enum dcgmGpuTopologyLevel_t

Represents level relationships within a system between two GPUs The enums are spaced to allow for future relationships. These match the definitions in nvml.h

Values
DCGM_TOPOLOGY_BOARD = 0x1
multi-GPU board
DCGM_TOPOLOGY_SINGLE = 0x2
all devices that only need traverse a single PCIe switch
DCGM_TOPOLOGY_MULTIPLE = 0x4
all devices that need not traverse a host bridge
DCGM_TOPOLOGY_HOSTBRIDGE = 0x8
all devices that are connected to the same host bridge
DCGM_TOPOLOGY_CPU = 0x10
all devices that are connected to the same CPU but possibly multiple host bridges
DCGM_TOPOLOGY_SYSTEM = 0x20
all devices in the system
DCGM_TOPOLOGY_NVLINK1 = 0x0100
GPUs connected via a single NVLINK link.
DCGM_TOPOLOGY_NVLINK2 = 0x0200
GPUs connected via two NVLINK links.
DCGM_TOPOLOGY_NVLINK3 = 0x0400
GPUs connected via three NVLINK links.
DCGM_TOPOLOGY_NVLINK4 = 0x0800
GPUs connected via four NVLINK links.
DCGM_TOPOLOGY_NVLINK5 = 0x1000
GPUs connected via five NVLINK links.
DCGM_TOPOLOGY_NVLINK6 = 0x2000
GPUs connected via six NVLINK links.
enum dcgmHealthSystems_t

Systems structure used to enable or disable health watch systems

Values
DCGM_HEALTH_WATCH_PCIE = 0x1
PCIe system watches (must have 1m of data before query).
DCGM_HEALTH_WATCH_NVLINK = 0x2
NVLINK system watches.
DCGM_HEALTH_WATCH_PMU = 0x4
Power management unit watches.
DCGM_HEALTH_WATCH_MCU = 0x8
Microcontroller unit watches.
DCGM_HEALTH_WATCH_MEM = 0x10
Memory watches.
DCGM_HEALTH_WATCH_SM = 0x20
Streaming multiprocessor watches.
DCGM_HEALTH_WATCH_INFOROM = 0x40
Inforom watches.
DCGM_HEALTH_WATCH_THERMAL = 0x80
Temperature watches (must have 1m of data before query).
DCGM_HEALTH_WATCH_POWER = 0x100
Power watches (must have 1m of data before query).
DCGM_HEALTH_WATCH_DRIVER = 0x200
Driver-related watches.
DCGM_HEALTH_WATCH_NVSWITCH_NONFATAL = 0x400
Non-fatal errors in NvSwitch.
DCGM_HEALTH_WATCH_NVSWITCH_FATAL = 0x800
Fatal errors in NvSwitch.
DCGM_HEALTH_WATCH_ALL = 0xFFFFFFFF
All watches enabled.
enum dcgmHealthWatchResults_t

Health Watch test results

Values
DCGM_HEALTH_RESULT_PASS = 0
All results within this system are reporting normal.
DCGM_HEALTH_RESULT_WARN = 10
A warning has been issued, refer to the response for more information.
DCGM_HEALTH_RESULT_FAIL = 20
A failure has been issued, refer to the response for more information.
enum dcgmIntrospectLevel_t

Identifies a level to retrieve field introspection info for

Values
DCGM_INTROSPECT_LVL_INVALID = 0
Invalid value.
DCGM_INTROSPECT_LVL_FIELD = 1
Introspection data is grouped by field ID.
DCGM_INTROSPECT_LVL_FIELD_GROUP = 2
Introspection data is grouped by field group.
DCGM_INTROSPECT_LVL_ALL_FIELDS
Introspection data is aggregated for all fields.
enum dcgmIntrospectState_t

State of DCGM metadata gathering. If it is set to DISABLED then "Metadata" API calls to DCGM are not supported.

Values
DCGM_INTROSPECT_STATE_DISABLED = 0
DCGM_INTROSPECT_STATE_ENABLED = 1
enum dcgmModuleId_t

Module IDs

Values
DcgmModuleIdCore = 0
Core DCGM - always loaded.
DcgmModuleIdNvSwitch = 1
NvSwitch Module.
DcgmModuleIdVGPU = 2
VGPU Module.
DcgmModuleIdIntrospect = 3
Introspection Module.
DcgmModuleIdHealth = 4
Health Module.
DcgmModuleIdPolicy = 5
Policy Module.
DcgmModuleIdConfig = 6
Config Module.
DcgmModuleIdDiag = 7
GPU Diagnostic Module.
DcgmModuleIdProfiling = 8
Profiling Module.
DcgmModuleIdCount
Always last. 1 greater than largest value above.
enum dcgmModuleStatus_t

Module Status. Modules are lazy loaded, so they will be in status DcgmModuleStatusNotLoaded until they are used. One modules are used, they will move to another status.

Values
DcgmModuleStatusNotLoaded = 0
Module has not been loaded yet.
DcgmModuleStatusBlacklisted = 1
Module has been blacklisted from being loaded.
DcgmModuleStatusFailed = 2
Loading the module failed.
DcgmModuleStatusLoaded = 3
Module has been loaded.
enum dcgmNvLinkLinkState_t

NvLink link states

Values
DcgmNvLinkLinkStateNotSupported = 0
NvLink is unsupported by this GPU (Default for GPUs).
DcgmNvLinkLinkStateDisabled = 1
NvLink is supported for this link but this link is disabled (Default for NvSwitches).
DcgmNvLinkLinkStateDown = 2
This NvLink link is down (inactive).
DcgmNvLinkLinkStateUp = 3
This NvLink link is up (active).
enum dcgmPerGpuTestIndices_t

Diagnostic per gpu tests - fixed indices for dcgmDiagResponsePerGpu_t.results[]

Values
DCGM_MEMORY_INDEX = 0
Memory test index.
DCGM_DIAGNOSTIC_INDEX = 1
Diagnostic test index.
DCGM_PCI_INDEX = 2
PCIe test index.
DCGM_SM_PERF_INDEX = 3
SM Stress test index.
DCGM_TARGETED_PERF_INDEX = 4
Targeted Stress test index.
DCGM_TARGETED_POWER_INDEX = 5
Targeted Power test index.
DCGM_MEMORY_BANDWIDTH_INDEX = 6
Memory bandwidth test index.
enum dcgmPolicyAction_t

Enumeration for policy actions

Values
DCGM_POLICY_ACTION_NONE = 0
no action
DCGM_POLICY_ACTION_GPURESET = 1
perform a GPU reset on violation
enum dcgmPolicyCondition_t

Enumeration for policy conditions. When used as part of dcgmPolicy_t these have corresponding parameters to allow them to be switched on/off or set specific violation thresholds

Values
DCGM_POLICY_COND_DBE = 0x1
Double bit errors -- boolean in dcgmPolicyConditionParms_t.
DCGM_POLICY_COND_PCI = 0x2
PCI events/errors -- boolean in dcgmPolicyConditionParms_t.
DCGM_POLICY_COND_MAX_PAGES_RETIRED = 0x4
Maximum number of retired pages -- number required in dcgmPolicyConditionParms_t.
DCGM_POLICY_COND_THERMAL = 0x8
Thermal violation -- number required in dcgmPolicyConditionParms_t.
DCGM_POLICY_COND_POWER = 0x10
Power violation -- number required in dcgmPolicyConditionParms_t.
DCGM_POLICY_COND_NVLINK = 0x20
NVLINK errors -- boolean in dcgmPolicyConditionParms_t.
DCGM_POLICY_COND_XID = 0x40
XID errors -- number required in dcgmPolicyConditionParms_t.
enum dcgmPolicyFailureResp_t

Enumeration for policy failure responses

Values
DCGM_POLICY_FAILURE_NONE = 0
on failure of validation perform no action
enum dcgmPolicyIsolation_t

Enumeration for policy isolation modes

Values
DCGM_POLICY_ISOLATION_NONE = 0
no isolation of GPUs on error
enum dcgmPolicyMode_t

Enumeration for policy modes

Values
DCGM_POLICY_MODE_AUTOMATED = 0
automatic mode
DCGM_POLICY_MODE_MANUAL = 1
manual mode
enum dcgmPolicyValidation_t

Enumeration for policy validation actions

Values
DCGM_POLICY_VALID_NONE = 0
no validation after an action is performed
DCGM_POLICY_VALID_SV_SHORT = 1
run a short System Validation on the system after failure
DCGM_POLICY_VALID_SV_MED = 2
run a medium System Validation test after failure
DCGM_POLICY_VALID_SV_LONG = 3
run a extensive System Validation test after failure