Structure Definitions¶
- group dcgmStructs
Unnamed Group
-
DCGM_HOME_DIR_VAR_NAME "DCGM_HOME_DIR"
Flags options for running the GPU diagnostic.
-
DCGM_RUN_FLAGS_VERBOSE 0x0001
Output in verbose mode; include information as well as warnings.
-
DCGM_RUN_FLAGS_STATSONFAIL 0x0002
Output stats only on failure.
-
DCGM_RUN_FLAGS_TRAIN 0x0004
UNUSED Train DCGM diagnostic and output a configuration file with golden values.
-
DCGM_RUN_FLAGS_FORCE_TRAIN 0x0008
UNUSED Ignore warnings against training the diagnostic and train anyway.
-
DCGM_RUN_FLAGS_FAIL_EARLY 0x0010
Enable fail early checks for the Targeted Stress, Targeted Power, SM Stress, and Diagnostic tests.
Unnamed Group
-
DCGM_TOPO_HINT_F_NONE 0x00000000
Topology hints for dcgmSelectGpusByTopology()
No hints specified
-
DCGM_TOPO_HINT_F_IGNOREHEALTH 0x00000001
Ignore the health of the GPUs when picking GPUs for job execution.
By default, only healthy GPUs are considered.
Defines
-
dcgmConnectV2Params_version1 MAKE_DCGM_VERSION(dcgmConnectV2Params_v1, 1)
Version 1 for dcgmConnectV2Params_v1.
-
dcgmConnectV2Params_version2 MAKE_DCGM_VERSION(dcgmConnectV2Params_v2, 2)
Version 2 for dcgmConnectV2Params_v2.
-
dcgmConnectV2Params_version dcgmConnectV2Params_version2
Latest version for dcgmConnectV2Params_t.
-
dcgmHostengineHealth_version1 MAKE_DCGM_VERSION(dcgmHostengineHealth_v1, 1)
-
dcgmHostengineHealth_version dcgmHostengineHealth_version1
Latest version for dcgmHostengineHealth_t.
-
dcgmGroupInfo_version2 MAKE_DCGM_VERSION(dcgmGroupInfo_v2, 2)
Version 2 for dcgmGroupInfo_v2.
-
dcgmGroupInfo_version dcgmGroupInfo_version2
Latest version for dcgmGroupInfo_t.
-
DCGM_MAX_INSTANCES_PER_GPU 8
-
DCGM_MAX_COMPUTE_INSTANCES_PER_GPU DCGM_MAX_INSTANCES_PER_GPU
-
DCGM_MAX_TOTAL_INSTANCES_PER_GPU 14
-
DCGM_MAX_HIERARCHY_INFO DCGM_MAX_NUM_DEVICES *DCGM_MAX_TOTAL_INSTANCES_PER_GPU
-
DCGM_MAX_INSTANCES DCGM_MAX_NUM_DEVICES *DCGM_MAX_INSTANCES_PER_GPU
-
DCGM_MAX_COMPUTE_INSTANCES DCGM_MAX_INSTANCES
-
dcgmMigHierarchy_version2 MAKE_DCGM_VERSION(dcgmMigHierarchy_v2, 2)
-
dcgmMigHierarchy_version dcgmMigHierarchy_version2
-
DCGM_CPU_CORE_BITMASK_COUNT_V1 (DCGM_MAX_NUM_CPU_CORES / sizeof(uint64_t) / CHAR_BIT)
Bitmask indicating which cores are owned by this CPUs.
-
dcgmCpuHierarchyOwnedCores_version1 MAKE_DCGM_VERSION(dcgmCpuHierarchyOwnedCores_v1, 1)
-
dcgmCpuHierarchy_version1 MAKE_DCGM_VERSION(dcgmCpuHierarchy_v1, 1)
Version 1 for dcgmCpuHierarchy_t.
-
DCGM_MAX_NUM_FIELD_GROUPS 64
Maximum number of field groups that can exist.
-
DCGM_MAX_FIELD_IDS_PER_FIELD_GROUP 128
Maximum number of field IDs that can be in a single field group.
-
dcgmFieldGroupInfo_version1 MAKE_DCGM_VERSION(dcgmFieldGroupInfo_v1, 1)
Version 1 for dcgmFieldGroupInfo_v1.
-
dcgmFieldGroupInfo_version dcgmFieldGroupInfo_version1
Latest version for dcgmFieldGroupInfo_t.
-
dcgmAllFieldGroup_version1 MAKE_DCGM_VERSION(dcgmAllFieldGroup_v1, 1)
Version 1 for dcgmAllFieldGroup_v1.
-
dcgmAllFieldGroup_version dcgmAllFieldGroup_version1
Latest version for dcgmAllFieldGroup_t.
-
dcgmClockSet_version1 MAKE_DCGM_VERSION(dcgmClockSet_v1, 1)
Version 1 for dcgmClockSet_v1.
-
dcgmClockSet_version dcgmClockSet_version1
Latest version for dcgmClockSet_t.
-
dcgmDeviceSupportedClockSets_version1 MAKE_DCGM_VERSION(dcgmDeviceSupportedClockSets_v1, 1)
Version 1 for dcgmDeviceSupportedClockSets_v1.
-
dcgmDeviceSupportedClockSets_version dcgmDeviceSupportedClockSets_version1
Latest version for dcgmDeviceSupportedClockSets_t.
-
dcgmDevicePidAccountingStats_version1 MAKE_DCGM_VERSION(dcgmDevicePidAccountingStats_v1, 1)
Version 1 for dcgmDevicePidAccountingStats_v1.
-
dcgmDevicePidAccountingStats_version dcgmDevicePidAccountingStats_version1
Latest version for dcgmDevicePidAccountingStats_t.
-
dcgmDeviceThermals_version1 MAKE_DCGM_VERSION(dcgmDeviceThermals_v1, 1)
Version 1 for dcgmDeviceThermals_v1.
-
dcgmDeviceThermals_version dcgmDeviceThermals_version1
Latest version for dcgmDeviceThermals_t.
-
dcgmDevicePowerLimits_version1 MAKE_DCGM_VERSION(dcgmDevicePowerLimits_v1, 1)
Version 1 for dcgmDevicePowerLimits_v1.
-
dcgmDevicePowerLimits_version dcgmDevicePowerLimits_version1
Latest version for dcgmDevicePowerLimits_t.
-
dcgmDeviceIdentifiers_version1 MAKE_DCGM_VERSION(dcgmDeviceIdentifiers_v1, 1)
Version 1 for dcgmDeviceIdentifiers_v1.
-
dcgmDeviceIdentifiers_version dcgmDeviceIdentifiers_version1
Latest version for dcgmDeviceIdentifiers_t.
-
dcgmDeviceMemoryUsage_version1 MAKE_DCGM_VERSION(dcgmDeviceMemoryUsage_v1, 1)
Version 1 for dcgmDeviceMemoryUsage_v1.
-
dcgmDeviceMemoryUsage_version dcgmDeviceMemoryUsage_version1
Latest version for dcgmDeviceMemoryUsage_t.
-
dcgmDeviceVgpuUtilInfo_version1 MAKE_DCGM_VERSION(dcgmDeviceVgpuUtilInfo_v1, 1)
Version 1 for dcgmDeviceVgpuUtilInfo_v1.
-
dcgmDeviceVgpuUtilInfo_version dcgmDeviceVgpuUtilInfo_version1
Latest version for dcgmDeviceVgpuUtilInfo_t.
-
dcgmDeviceEncStats_version1 MAKE_DCGM_VERSION(dcgmDeviceEncStats_v1, 1)
Version 1 for dcgmDeviceEncStats_v1.
-
dcgmDeviceEncStats_version dcgmDeviceEncStats_version1
Latest version for dcgmDeviceEncStats_t.
-
dcgmDeviceFbcStats_version1 MAKE_DCGM_VERSION(dcgmDeviceFbcStats_v1, 1)
Version 1 for dcgmDeviceFbcStats_v1.
-
dcgmDeviceFbcStats_version dcgmDeviceFbcStats_version1
Latest version for dcgmDeviceEncStats_t.
-
dcgmDeviceFbcSessionInfo_version1 MAKE_DCGM_VERSION(dcgmDeviceFbcSessionInfo_v1, 1)
Version 1 for dcgmDeviceFbcSessionInfo_v1.
-
dcgmDeviceFbcSessionInfo_version dcgmDeviceFbcSessionInfo_version1
Latest version for dcgmDeviceFbcSessionInfo_t.
-
dcgmDeviceFbcSessions_version1 MAKE_DCGM_VERSION(dcgmDeviceFbcSessions_v1, 1)
Version 1 for dcgmDeviceFbcSessions_v1.
-
dcgmDeviceFbcSessions_version dcgmDeviceFbcSessions_version1
Latest version for dcgmDeviceFbcSessions_t.
-
dcgmDeviceVgpuEncSessions_version1 MAKE_DCGM_VERSION(dcgmDeviceVgpuEncSessions_v1, 1)
Version 1 for dcgmDeviceVgpuEncSessions_v1.
-
dcgmDeviceVgpuEncSessions_version dcgmDeviceVgpuEncSessions_version1
Latest version for dcgmDeviceVgpuEncSessions_t.
-
dcgmDeviceVgpuProcessUtilInfo_version1 MAKE_DCGM_VERSION(dcgmDeviceVgpuProcessUtilInfo_v1, 1)
Version 1 for dcgmDeviceVgpuProcessUtilInfo_v1.
-
dcgmDeviceVgpuProcessUtilInfo_version dcgmDeviceVgpuProcessUtilInfo_version1
Latest version for dcgmDeviceVgpuProcessUtilInfo_t.
-
dcgmDeviceVgpuTypeInfo_version1 MAKE_DCGM_VERSION(dcgmDeviceVgpuTypeInfo_v1, 1)
Version 1 for dcgmDeviceVgpuTypeInfo_v1.
-
dcgmDeviceVgpuTypeInfo_version2 MAKE_DCGM_VERSION(dcgmDeviceVgpuTypeInfo_v2, 2)
Version 2 for dcgmDeviceVgpuTypeInfo_v2.
-
dcgmDeviceVgpuTypeInfo_version dcgmDeviceVgpuTypeInfo_version2
Latest version for dcgmDeviceVgpuTypeInfo_t.
-
dcgmDeviceSupportedVgpuTypeInfo_version1 MAKE_DCGM_VERSION(dcgmDeviceSupportedVgpuTypeInfo_v1, 1)
Version 1 for dcgmDeviceSupportedVgpuTypeInfo_v1.
-
dcgmDeviceSupportedVgpuTypeInfo_version dcgmDeviceSupportedVgpuTypeInfo_version1
Latest version for dcgmDeviceSupportedVgpuTypeInfo_t.
-
dcgmDeviceSettings_version2 MAKE_DCGM_VERSION(dcgmDeviceSettings_v2, 2)
-
dcgmDeviceSettings_version dcgmDeviceSettings_version2
-
dcgmDeviceAttributes_version3 MAKE_DCGM_VERSION(dcgmDeviceAttributes_v3, 3)
Version 3 for dcgmDeviceAttributes_v3.
-
dcgmDeviceAttributes_version dcgmDeviceAttributes_version3
Latest version for dcgmDeviceAttributes_t.
-
dcgmDeviceMigAttributesInfo_version1 MAKE_DCGM_VERSION(dcgmDeviceMigAttributesInfo_v1, 1)
Version 1 for dcgmDeviceMigAttributesInfo_v1.
-
dcgmDeviceMigAttributesInfo_version dcgmDeviceMigAttributesInfo_version1
Latest version for dcgmDeviceMigAttributesInfo_t.
-
dcgmDeviceMigAttributes_version1 MAKE_DCGM_VERSION(dcgmDeviceMigAttributes_v1, 1)
Version 1 for dcgmDeviceMigAttributes_v1.
-
dcgmDeviceMigAttributes_version dcgmDeviceMigAttributes_version1
Latest version for dcgmDeviceMigAttributes_t.
-
dcgmGpuInstanceProfileInfo_version1 MAKE_DCGM_VERSION(dcgmGpuInstanceProfileInfo_v1, 1)
Version 1 for dcgmGpuInstanceProfileInfo_v1.
-
dcgmGpuInstanceProfileInfo_version dcgmGpuInstanceProfileInfo_version1
Latest version for dcgmGpuInstanceProfileInfo_t.
-
dcgmGpuInstanceProfiles_version1 MAKE_DCGM_VERSION(dcgmGpuInstanceProfiles_v1, 1)
Version 1 for dcgmGpuInstanceProfiles_v1.
-
dcgmGpuInstanceProfiles_version dcgmGpuInstanceProfiles_version1
Latest version for dcgmGpuInstanceProfiles_t.
-
dcgmComputeInstanceProfileInfo_version1 MAKE_DCGM_VERSION(dcgmComputeInstanceProfileInfo_v1, 1)
Version 1 for dcgmComputeInstanceProfileInfo_v1.
-
dcgmComputeInstanceProfileInfo_version dcgmComputeInstanceProfileInfo_version1
Latest version for dcgmComputeInstanceProfileInfo_t.
-
dcgmComputeInstanceProfiles_version1 MAKE_DCGM_VERSION(dcgmComputeInstanceProfiles_v1, 1)
Version 1 for dcgmComputeInstanceProfiles_v1.
-
dcgmComputeInstanceProfiles_version dcgmComputeInstanceProfiles_version1
Latest version for dcgmComputeInstanceProfiles_t.
-
DCGM_MAX_VGPU_TYPES_PER_PGPU 32
Maximum number of vGPU types per physical GPU.
-
DCGM_DEVICE_UUID_BUFFER_SIZE 80
Represents the size of a buffer that holds string related to attributes specific to vGPU instance.
-
dcgmConfig_version1 MAKE_DCGM_VERSION(dcgmConfig_v1, 1)
Version 1 for dcgmConfig_v1.
-
dcgmConfig_version dcgmConfig_version1
Latest version for dcgmConfig_t.
-
dcgmPolicyViolation_version1 MAKE_DCGM_VERSION(dcgmPolicyViolation_v1, 1)
-
dcgmPolicyViolation_version dcgmPolicyViolation_version1
-
DCGM_POLICY_COND_IDX_MAX 7
-
DCGM_POLICY_COND_MAX DCGM_POLICY_COND_IDX_MAX
-
dcgmPolicy_version1 MAKE_DCGM_VERSION(dcgmPolicy_v1, 1)
Version 1 for dcgmPolicy_v1.
-
dcgmPolicy_version dcgmPolicy_version1
Latest version for dcgmPolicy_t.
-
dcgmPolicyCallbackResponse_version1 MAKE_DCGM_VERSION(dcgmPolicyCallbackResponse_v1, 1)
Version 1 for dcgmPolicyCallbackResponse_v1.
-
dcgmPolicyCallbackResponse_version dcgmPolicyCallbackResponse_version1
Latest version for dcgmPolicyCallbackResponse_t.
-
DCGM_MAX_BLOB_LENGTH 4096
Set above size of largest blob entry.
Currently this is dcgmDeviceVgpuTypeInfo_v1
-
dcgmFieldValue_version1 MAKE_DCGM_VERSION(dcgmFieldValue_v1, 1)
Version 1 for dcgmFieldValue_v1.
-
dcgmFieldValue_version2 MAKE_DCGM_VERSION(dcgmFieldValue_v2, 2)
Version 2 for dcgmFieldValue_v2.
-
DCGM_FV_FLAG_LIVE_DATA 0x00000001
Field value flags used by dcgmEntitiesGetLatestValues.
Retrieve live data from the driver rather than cached data. Warning: Setting this flag will result in multiple calls to the NVIDIA driver that will be much slower than retrieving a cached value.
-
DCGM_HEALTH_WATCH_COUNT_V1 10
For iterating through the dcgmHealthSystems_v1 enum
-
DCGM_HEALTH_WATCH_COUNT_V2 12
For iterating through the dcgmHealthSystems_v2 enum
-
DCGM_ERR_MSG_LENGTH 512
-
DCGM_HEALTH_WATCH_MAX_INCIDENTS DCGM_GROUP_MAX_ENTITIES
-
dcgmHealthResponse_version4 MAKE_DCGM_VERSION(dcgmHealthResponse_v4, 4)
Version 4 for dcgmHealthResponse_v4.
-
dcgmHealthResponse_version dcgmHealthResponse_version4
Latest version for dcgmHealthResponse_t.
-
dcgmHealthSetParams_version2 MAKE_DCGM_VERSION(dcgmHealthSetParams_v2, 2)
Version 2 for dcgmHealthSet_v2.
-
DCGM_MAX_PID_INFO_NUM 16
-
dcgmPidInfo_version2 MAKE_DCGM_VERSION(dcgmPidInfo_v2, 2)
Version 2 for dcgmPidInfo_v2.
-
dcgmPidInfo_version dcgmPidInfo_version2
Latest version for dcgmPidInfo_t.
-
dcgmJobInfo_version3 MAKE_DCGM_VERSION(dcgmJobInfo_v3, 3)
Version 3 for dcgmJobInfo_v3.
-
dcgmJobInfo_version dcgmJobInfo_version3
Latest version for dcgmJobInfo_t.
-
dcgmRunningProcess_version1 MAKE_DCGM_VERSION(dcgmRunningProcess_v1, 1)
Version 1 for dcgmRunningProcess_v1.
-
dcgmRunningProcess_version dcgmRunningProcess_version1
Latest version for dcgmRunningProcess_t.
-
DCGM_MAX_ERRORS 5
-
DCGM_SM_PERF_INDEX DCGM_SM_STRESS_INDEX
-
DCGM_TARGETED_PERF_INDEX DCGM_TARGETED_PERF_INDEX
-
DCGM_PER_GPU_TEST_COUNT_V8 13
-
DCGM_PER_GPU_TEST_COUNT_V7 9
-
DCGM_DIAG_AUX_DATA_LEN 2048
-
dcgmDiagTestAuxData_version1 MAKE_DCGM_VERSION(dcgmDiagTestAuxData_v1, 1)
Version 1 for dcgmDiagTestAuxData_v1.
-
dcgmDiagTestAuxData_version dcgmDiagTestAuxData_version1
-
DCGM_SWTEST_COUNT 10
-
LEVEL_ONE_MAX_RESULTS 16
-
DCGM_DEVICE_ID_LEN 5
-
DCGM_VERSION_LEN 12
-
dcgmDiagResponse_version10 MAKE_DCGM_VERSION(dcgmDiagResponse_v10, 10)
Version 10 for dcgmDiagResponse_v10.
-
dcgmDiagResponse_version9 MAKE_DCGM_VERSION(dcgmDiagResponse_v9, 9)
Version 9 for dcgmDiagResponse_v9.
-
dcgmDiagResponse_version8 MAKE_DCGM_VERSION(dcgmDiagResponse_v8, 8)
Version 8 for dcgmDiagResponse_v8.
-
dcgmDiagResponse_version7 MAKE_DCGM_VERSION(dcgmDiagResponse_v7, 7)
Version 7 for dcgmDiagResponse_v7.
-
DCGM_TOPOLOGY_PATH_PCI(x) (dcgmGpuTopologyLevel_t)((unsigned int)(x)&0xFF)
-
DCGM_TOPOLOGY_PATH_NVLINK(x) (dcgmGpuTopologyLevel_t)((unsigned int)(x)&0xFFFFFF00)
-
DCGM_AFFINITY_BITMASK_ARRAY_SIZE 8
-
dcgmDeviceTopology_version1 MAKE_DCGM_VERSION(dcgmDeviceTopology_v1, 1)
Version 1 for dcgmDeviceTopology_v1.
-
dcgmDeviceTopology_version dcgmDeviceTopology_version1
Latest version for dcgmDeviceTopology_t.
-
dcgmGroupTopology_version1 MAKE_DCGM_VERSION(dcgmGroupTopology_v1, 1)
Version 1 for dcgmGroupTopology_v1.
-
dcgmGroupTopology_version dcgmGroupTopology_version1
Latest version for dcgmGroupTopology_t.
-
dcgmIntrospectMemory_version1 MAKE_DCGM_VERSION(dcgmIntrospectMemory_v1, 1)
Version 1 for dcgmIntrospectMemory_t.
-
dcgmIntrospectMemory_version dcgmIntrospectMemory_version1
Latest version for dcgmIntrospectMemory_t.
-
dcgmIntrospectCpuUtil_version1 MAKE_DCGM_VERSION(dcgmIntrospectCpuUtil_v1, 1)
Version 1 for dcgmIntrospectCpuUtil_t.
-
dcgmIntrospectCpuUtil_version dcgmIntrospectCpuUtil_version1
Latest version for dcgmIntrospectCpuUtil_t.
-
DCGM_MAX_CONFIG_FILE_LEN 10000
-
DCGM_MAX_TEST_NAMES 20
-
DCGM_MAX_TEST_NAMES_LEN 50
-
DCGM_MAX_TEST_PARMS 100
-
DCGM_MAX_TEST_PARMS_LEN 100
-
DCGM_MAX_TEST_PARMS_LEN_V2 1050
-
DCGM_GPU_LIST_LEN 50
-
DCGM_EXPECTED_ENTITIES_LEN 50
-
DCGM_FILE_LEN 30
-
DCGM_PATH_LEN 128
-
DCGM_THROTTLE_MASK_LEN 50
-
dcgmRunDiag_version8 MAKE_DCGM_VERSION(dcgmRunDiag_v8, 8)
Version 8 for dcgmRunDiag_t.
-
dcgmRunDiag_version7 MAKE_DCGM_VERSION(dcgmRunDiag_v7, 7)
Version 7 for dcgmRunDiag_t.
-
DCGM_GEGE_FLAG_ONLY_SUPPORTED 0x00000001
Flags for dcgmGetEntityGroupEntities’s flags parameter.
Only return entities that are supported by DCGM. This mimics the behavior of dcgmGetAllSupportedDevices().
-
dcgmTopoSchedHint_version1 MAKE_DCGM_VERSION(dcgmTopoSchedHint_v1, 1)
-
dcgmNvLinkStatus_version3 MAKE_DCGM_VERSION(dcgmNvLinkStatus_v3, 3)
Version 3 of dcgmNvLinkStatus.
-
DCGM_SUMMARY_MIN 0x00000001
-
DCGM_SUMMARY_MAX 0x00000002
-
DCGM_SUMMARY_AVG 0x00000004
-
DCGM_SUMMARY_SUM 0x00000008
-
DCGM_SUMMARY_COUNT 0x00000010
-
DCGM_SUMMARY_INTEGRAL 0x00000020
-
DCGM_SUMMARY_DIFF 0x00000040
-
DCGM_SUMMARY_SIZE 7
-
dcgmFieldSummaryRequest_version1 MAKE_DCGM_VERSION(dcgmFieldSummaryRequest_v1, 1)
-
DCGM_MODULE_STATUSES_CAPACITY 16
-
dcgmModuleGetStatuses_version1 MAKE_DCGM_VERSION(dcgmModuleGetStatuses_v1, 1)
Version 1 of dcgmModuleGetStatuses.
-
dcgmModuleGetStatuses_version dcgmModuleGetStatuses_version1
-
dcgmStartEmbeddedV2Params_version1 MAKE_DCGM_VERSION(dcgmStartEmbeddedV2Params_v1, 1)
Version 1 for dcgmStartEmbeddedV2Params_v1.
-
dcgmStartEmbeddedV2Params_version2 MAKE_DCGM_VERSION(dcgmStartEmbeddedV2Params_v2, 2)
Version 2 for dcgmStartEmbeddedV2Params.
-
DCGM_PROF_MAX_NUM_GROUPS_V2 10
Maximum number of metric ID groups that can exist in DCGM.
-
DCGM_PROF_MAX_FIELD_IDS_PER_GROUP_V2 64
Maximum number of field IDs that can be in a single DCGM profiling metric group.
-
dcgmProfGetMetricGroups_version3 MAKE_DCGM_VERSION(dcgmProfGetMetricGroups_v3, 3)
Version 3 of dcgmProfGetMetricGroups_t.
See dcgm_structs_24.h for v2
-
dcgmProfGetMetricGroups_version dcgmProfGetMetricGroups_version3
-
dcgmProfWatchFields_version2 MAKE_DCGM_VERSION(dcgmProfWatchFields_v2, 2)
Version 2 of dcgmProfWatchFields_v2.
-
dcgmProfWatchFields_version dcgmProfWatchFields_version2
-
dcgmProfUnwatchFields_version1 MAKE_DCGM_VERSION(dcgmProfUnwatchFields_v1, 1)
Version 1 of dcgmProfUnwatchFields_v1.
-
dcgmProfUnwatchFields_version dcgmProfUnwatchFields_version1
-
dcgmSettingsSetLoggingSeverity_version1 MAKE_DCGM_VERSION(dcgmSettingsSetLoggingSeverity_v1, 1)
-
dcgmSettingsSetLoggingSeverity_version dcgmSettingsSetLoggingSeverity_version1
-
dcgmVersionInfo_version2 MAKE_DCGM_VERSION(dcgmVersionInfo_v2, 2)
Version 2 of the dcgmVersionInfo_v2.
-
dcgmVersionInfo_version dcgmVersionInfo_version2
Typedefs
-
typedef uintptr_t dcgmHandle_t
Identifier for DCGM Handle.
-
typedef uintptr_t dcgmGpuGrp_t
Identifier for a group of GPUs. A group can have one or more GPUs.
-
typedef uintptr_t dcgmFieldGrp_t
Identifier for a group of fields.
-
typedef uintptr_t dcgmStatus_t
Identifier for list of status codes.
-
typedef struct dcgm_link_s dcgm_link_t
Represents a link object.
type should be one of DCGM_FE_GPU or DCGM_FE_SWITCH; gpuId or switchID is the associated gpu or switch; and index is the link index, 0-based, with TX (even) coming before RX (odd).
-
typedef dcgmConnectV2Params_v2 dcgmConnectV2Params_t
Typedef for dcgmConnectV2Params_v2.
-
typedef dcgmHostengineHealth_v1 dcgmHostengineHealth_t
Typedef for dcgmHostengineHealth_t.
-
typedef dcgmGroupInfo_v2 dcgmGroupInfo_t
Typedef for dcgmGroupInfo_v2.
-
typedef dcgmCpuHierarchyOwnedCores_v1 dcgmCpuHierarchyOwnedCores_t
-
typedef dcgmCpuHierarchy_v1 dcgmCpuHierarchy_t
-
typedef dcgmFieldGroupInfo_v1 dcgmFieldGroupInfo_t
-
typedef dcgmAllFieldGroup_v1 dcgmAllFieldGroup_t
-
typedef dcgmClockSet_v1 dcgmClockSet_t
Typedef for dcgmClockSet_v1.
-
typedef dcgmDeviceSupportedClockSets_v1 dcgmDeviceSupportedClockSets_t
Typedef for dcgmDeviceSupportedClockSets_v1.
-
typedef dcgmDevicePidAccountingStats_v1 dcgmDevicePidAccountingStats_t
Typedef for dcgmDevicePidAccountingStats_v1.
-
typedef dcgmDeviceThermals_v1 dcgmDeviceThermals_t
Typedef for dcgmDeviceThermals_v1.
-
typedef dcgmDevicePowerLimits_v1 dcgmDevicePowerLimits_t
Typedef for dcgmDevicePowerLimits_v1.
-
typedef dcgmDeviceIdentifiers_v1 dcgmDeviceIdentifiers_t
Typedef for dcgmDeviceIdentifiers_v1.
-
typedef dcgmDeviceMemoryUsage_v1 dcgmDeviceMemoryUsage_t
Typedef for dcgmDeviceMemoryUsage_v1.
-
typedef dcgmDeviceVgpuUtilInfo_v1 dcgmDeviceVgpuUtilInfo_t
Typedef for dcgmDeviceVgpuUtilInfo_v1.
-
typedef dcgmDeviceEncStats_v1 dcgmDeviceEncStats_t
Typedef for dcgmDeviceEncStats_v1.
-
typedef dcgmDeviceFbcStats_v1 dcgmDeviceFbcStats_t
Typedef for dcgmDeviceFbcStats_v1.
-
typedef enum dcgmFBCSessionType_enum dcgmFBCSessionType_t
-
typedef dcgmDeviceFbcSessionInfo_v1 dcgmDeviceFbcSessionInfo_t
Typedef for dcgmDeviceFbcSessionInfo_v1.
-
typedef dcgmDeviceFbcSessions_v1 dcgmDeviceFbcSessions_t
Typedef for dcgmDeviceFbcSessions_v1.
-
typedef enum dcgmEncoderQueryType_enum dcgmEncoderType_t
-
typedef dcgmDeviceVgpuEncSessions_v1 dcgmDeviceVgpuEncSessions_t
Typedef for dcgmDeviceVgpuEncSessions_v1.
-
typedef dcgmDeviceVgpuProcessUtilInfo_v1 dcgmDeviceVgpuProcessUtilInfo_t
Typedef for dcgmDeviceVgpuProcessUtilInfo_v1.
-
typedef dcgmDeviceVgpuTypeInfo_v2 dcgmDeviceVgpuTypeInfo_t
Typedef for dcgmDeviceVgpuTypeInfo_v2.
-
typedef dcgmDeviceSupportedVgpuTypeInfo_v1 dcgmDeviceSupportedVgpuTypeInfo_t
Typedef for dcgmDeviceSupportedVgpuTypeInfo_v1.
-
typedef dcgmDeviceSettings_v2 dcgmDeviceSettings_t
-
typedef dcgmDeviceAttributes_v3 dcgmDeviceAttributes_t
Typedef for dcgmDeviceAttributes_v3.
-
typedef dcgmDeviceMigAttributesInfo_v1 dcgmDeviceMigAttributesInfo_t
Typedef for dcgmDeviceMigAttributesInfo_v1.
-
typedef dcgmDeviceMigAttributes_v1 dcgmDeviceMigAttributes_t
Typedef for dcgmDeviceMigAttributes_v1.
-
typedef dcgmGpuInstanceProfileInfo_v1 dcgmGpuInstanceProfileInfo_t
Typedef for dcgmGpuInstanceProfileInfo_v1.
-
typedef dcgmGpuInstanceProfiles_v1 dcgmGpuInstanceProfiles_t
Typedef for dcgmGpuInstanceProfiles_v1.
-
typedef dcgmComputeInstanceProfileInfo_v1 dcgmComputeInstanceProfileInfo_t
Typedef for dcgmComputeInstanceProfileInfo_v1.
-
typedef dcgmComputeInstanceProfiles_v1 dcgmComputeInstanceProfiles_t
Typedef for dcgmComputeInstanceProfiles_v1.
-
typedef dcgmConfig_v1 dcgmConfig_t
Typedef for dcgmConfig_v1.
-
typedef int (*fpRecvUpdates)(void *userData)
Represents a callback to receive updates from asynchronous functions.
Currently the only implemented callback function is dcgmPolicyRegister and the void * data will be a pointer to dcgmPolicyCallbackResponse_t. Ex. dcgmPolicyCallbackResponse_t *callbackResponse = (dcgmPolicyCallbackResponse_t *) userData;
-
typedef dcgmPolicyViolation_v1 dcgmPolicyViolation_t
-
typedef enum dcgmPolicyConditionIdx_enum dcgmPolicyConditionIdx_t
Enumeration for policy conditions.
When used as part of dcgmPolicy_t these have corresponding parameters to allow them to be switched on/off or set specific violation thresholds
-
typedef enum dcgmPolicyCondition_enum dcgmPolicyCondition_t
Bitmask enumeration for policy conditions.
When used as part of dcgmPolicy_t these have corresponding parameters to allow them to be switched on/off or set specific violation thresholds
-
typedef struct dcgmPolicyConditionParams_st dcgmPolicyConditionParams_t
Structure for policy condition parameters.
This structure contains a tag that represents the type of the value being passed as well as a “val” which is a union of the possible value types. For example, to pass a true boolean: tag = BOOL, val.boolean = 1.
-
typedef enum dcgmPolicyMode_enum dcgmPolicyMode_t
Enumeration for policy modes.
-
typedef enum dcgmPolicyIsolation_enum dcgmPolicyIsolation_t
Enumeration for policy isolation modes.
-
typedef enum dcgmPolicyAction_enum dcgmPolicyAction_t
Enumeration for policy actions.
-
typedef enum dcgmPolicyValidation_enum dcgmPolicyValidation_t
Enumeration for policy validation actions.
-
typedef enum dcgmPolicyFailureResp_enum dcgmPolicyFailureResp_t
Enumeration for policy failure responses.
-
typedef dcgmPolicy_v1 dcgmPolicy_t
Typedef for dcgmPolicy_v1.
-
typedef dcgmPolicyCallbackResponse_v1 dcgmPolicyCallbackResponse_t
Typedef for dcgmPolicyCallbackResponse_v1.
-
typedef int (*dcgmFieldValueEnumeration_f)(unsigned int gpuId, dcgmFieldValue_v1 *values, int numValues, void *userData)
User callback function for processing one or more field updates.
This callback will be invoked one or more times per field until all of the expected field values have been enumerated. It is up to the callee to detect when the field id changes
- Param gpuId:
IN: GPU ID of the GPU this field value set belongs to
- Param values:
IN: Field values. These values must be copied as they will be destroyed as soon as this call returns.
- Param numValues:
IN: Number of entries that are valid in values[]
- Param userData:
IN: User data pointer passed to the update function that generated this callback
- Return:
0 if OK <0 if enumeration should stop. This allows to callee to abort field value enumeration.
-
typedef int (*dcgmFieldValueEntityEnumeration_f)(dcgm_field_entity_group_t entityGroupId, dcgm_field_eid_t entityId, dcgmFieldValue_v1 *values, int numValues, void *userData)
User callback function for processing one or more field updates.
This callback will be invoked one or more times per field until all of the expected field values have been enumerated. It is up to the callee to detect when the field id changes
- Param entityGroupId:
IN: entityGroup of the entity this field value set belongs to
- Param entityId:
IN: Entity this field value set belongs to
- Param values:
IN: Field values. These values must be copied as they will be destroyed as soon as this call returns.
- Param numValues:
IN: Number of entries that are valid in values[]
- Param userData:
IN: User data pointer passed to the update function that generated this callback
- Return:
0 if OK <0 if enumeration should stop. This allows to callee to abort field value enumeration.
-
typedef enum dcgmHealthSystems_enum dcgmHealthSystems_t
Systems structure used to enable or disable health watch systems.
-
typedef enum dcgmHealthWatchResult_enum dcgmHealthWatchResults_t
Health Watch test results.
-
typedef dcgmHealthResponse_v4 dcgmHealthResponse_t
Typedef for dcgmHealthResponse_v4.
-
typedef dcgmPidInfo_v2 dcgmPidInfo_t
Typedef for dcgmPidInfo_v2.
-
typedef dcgmJobInfo_v3 dcgmJobInfo_t
Typedef for dcgmJobInfo_v3.
-
typedef dcgmRunningProcess_v1 dcgmRunningProcess_t
Typedef for dcgmRunningProcess_v1.
-
typedef enum dcgmDiagResult_enum dcgmDiagResult_t
Diagnostic test results.
-
typedef enum dcgmPerGpuTestIndices_enum dcgmPerGpuTestIndices_t
Diagnostic per gpu tests - fixed indices for dcgmDiagResponsePerGpu_t.results[].
-
typedef enum dcgmSoftwareTest_enum dcgmSoftwareTest_t
-
typedef enum dcgmGpuLevel_enum dcgmGpuTopologyLevel_t
Represents level relationships within a system between two GPUs The enums are spaced to allow for future relationships.
These match the definitions in nvml.h
-
typedef dcgmDeviceTopology_v1 dcgmDeviceTopology_t
Typedef for dcgmDeviceTopology_v1.
-
typedef dcgmGroupTopology_v1 dcgmGroupTopology_t
Typedef for dcgmGroupTopology_v1.
-
typedef dcgmIntrospectMemory_v1 dcgmIntrospectMemory_t
Typedef for dcgmIntrospectMemory_t.
-
typedef dcgmIntrospectCpuUtil_v1 dcgmIntrospectCpuUtil_t
Typedef for dcgmIntrospectCpuUtil_t.
-
typedef enum dcgmGpuNVLinkErrorType_enum dcgmGpuNVLinkErrorType_t
Identifies a GPU NVLink error type returned by DCGM_FI_DEV_GPU_NVLINK_ERRORS.
-
typedef dcgmTopoSchedHint_v1 dcgmTopoSchedHint_t
-
typedef enum dcgmNvLinkLinkState_enum dcgmNvLinkLinkState_t
NvLink link states.
-
typedef dcgmNvLinkStatus_v3 dcgmNvLinkStatus_t
-
typedef dcgmFieldSummaryRequest_v1 dcgmFieldSummaryRequest_t
-
typedef dcgmModuleGetStatuses_v1 dcgmModuleGetStatuses_t
-
typedef dcgmProfGetMetricGroups_v3 dcgmProfGetMetricGroups_t
-
typedef dcgmProfWatchFields_v2 dcgmProfWatchFields_t
-
typedef dcgmProfUnwatchFields_v1 dcgmProfUnwatchFields_t
-
typedef dcgmSettingsSetLoggingSeverity_v1 dcgmSettingsSetLoggingSeverity_t
-
typedef dcgmVersionInfo_v2 dcgmVersionInfo_t
Enums
-
enum DcgmLoggingSeverity_t
DCGM Logging Severities.
These match up with plog severities defined in Severity.h Each level includes all of the levels above it. For instance, level 4 includes 3,2, and 1 as well
Values:
-
enumerator DcgmLoggingSeverityUnspecified
Don’t care/inherit from the environment
-
enumerator DcgmLoggingSeverityNone
No logging
-
enumerator DcgmLoggingSeverityFatal
Fatal Errors
-
enumerator DcgmLoggingSeverityError
Errors
-
enumerator DcgmLoggingSeverityWarning
Warnings
-
enumerator DcgmLoggingSeverityInfo
Informative
-
enumerator DcgmLoggingSeverityDebug
Debug information (will generate large logs)
-
enumerator DcgmLoggingSeverityVerbose
Verbose debugging information
-
enumerator DcgmLoggingSeverityUnspecified
-
enum dcgmMigProfile_t
Enum for the different kinds of MIG profiles.
Values:
-
enumerator DcgmMigProfileNone
No profile (for GPUs)
-
enumerator DcgmMigProfileGpuInstanceSlice1
GPU instance slice 1
-
enumerator DcgmMigProfileGpuInstanceSlice2
GPU instance slice 2
-
enumerator DcgmMigProfileGpuInstanceSlice3
GPU instance slice 3
-
enumerator DcgmMigProfileGpuInstanceSlice4
GPU instance slice 4
-
enumerator DcgmMigProfileGpuInstanceSlice7
GPU instance slice 7
-
enumerator DcgmMigProfileGpuInstanceSlice8
GPU instance slice 8
-
enumerator DcgmMigProfileGpuInstanceSlice6
GPU instance slice 6
-
enumerator DcgmMigProfileGpuInstanceSlice1Rev1
GPU instance slice 1 revision 1
-
enumerator DcgmMigProfileGpuInstanceSlice2Rev1
GPU instance slice 2 revision 1
-
enumerator DcgmMigProfileGpuInstanceSlice1Rev2
GPU instance slice 1 revision 2
-
enumerator DcgmMigProfileComputeInstanceSlice1
compute instance slice 1
-
enumerator DcgmMigProfileComputeInstanceSlice2
compute instance slice 2
-
enumerator DcgmMigProfileComputeInstanceSlice3
compute instance slice 3
-
enumerator DcgmMigProfileComputeInstanceSlice4
compute instance slice 4
-
enumerator DcgmMigProfileComputeInstanceSlice7
compute instance slice 7
-
enumerator DcgmMigProfileComputeInstanceSlice8
compute instance slice 8
-
enumerator DcgmMigProfileComputeInstanceSlice6
compute instance slice 6
-
enumerator DcgmMigProfileComputeInstanceSlice1Rev1
compute instance slice 1 revision 1
-
enumerator DcgmMigProfileNone
-
enum dcgmFBCSessionType_enum
Values:
-
enumerator DCGM_FBC_SESSION_TYPE_UNKNOWN
Unknown.
-
enumerator DCGM_FBC_SESSION_TYPE_TOSYS
FB capture for a system buffer.
-
enumerator DCGM_FBC_SESSION_TYPE_CUDA
FB capture for a cuda buffer.
-
enumerator DCGM_FBC_SESSION_TYPE_VID
FB capture for a Vid buffer.
-
enumerator DCGM_FBC_SESSION_TYPE_HWENC
FB capture for a NVENC HW buffer.
-
enumerator DCGM_FBC_SESSION_TYPE_UNKNOWN
-
enum dcgmEncoderQueryType_enum
Values:
-
enumerator DCGM_ENCODER_QUERY_H264
-
enumerator DCGM_ENCODER_QUERY_HEVC
-
enumerator DCGM_ENCODER_QUERY_H264
-
enum dcgmPolicyConditionIdx_enum
Enumeration for policy conditions.
When used as part of dcgmPolicy_t these have corresponding parameters to allow them to be switched on/off or set specific violation thresholds
Values:
-
enumerator DCGM_POLICY_COND_IDX_DBE
Double bit errors — boolean in dcgmPolicyConditionParams_t.
-
enumerator DCGM_POLICY_COND_IDX_PCI
PCI events/errors — boolean in dcgmPolicyConditionParams_t.
-
enumerator DCGM_POLICY_COND_IDX_MAX_PAGES_RETIRED
Maximum number of retired pages — number required in dcgmPolicyConditionParams_t.
-
enumerator DCGM_POLICY_COND_IDX_THERMAL
Thermal violation — number required in dcgmPolicyConditionParams_t.
-
enumerator DCGM_POLICY_COND_IDX_POWER
Power violation — number required in dcgmPolicyConditionParams_t.
-
enumerator DCGM_POLICY_COND_IDX_NVLINK
NVLINK errors — boolean in dcgmPolicyConditionParams_t.
-
enumerator DCGM_POLICY_COND_IDX_XID
XID errors — number required in dcgmPolicyConditionParams_t.
-
enumerator DCGM_POLICY_COND_IDX_DBE
-
enum dcgmPolicyCondition_enum
Bitmask enumeration for policy conditions.
When used as part of dcgmPolicy_t these have corresponding parameters to allow them to be switched on/off or set specific violation thresholds
Values:
-
enumerator DCGM_POLICY_COND_DBE
Double bit errors — boolean in dcgmPolicyConditionParams_t.
-
enumerator DCGM_POLICY_COND_PCI
PCI events/errors — boolean in dcgmPolicyConditionParams_t.
-
enumerator DCGM_POLICY_COND_MAX_PAGES_RETIRED
Maximum number of retired pages — number required in dcgmPolicyConditionParams_t.
-
enumerator DCGM_POLICY_COND_THERMAL
Thermal violation — number required in dcgmPolicyConditionParams_t.
-
enumerator DCGM_POLICY_COND_POWER
Power violation — number required in dcgmPolicyConditionParams_t.
-
enumerator DCGM_POLICY_COND_NVLINK
NVLINK errors — boolean in dcgmPolicyConditionParams_t.
-
enumerator DCGM_POLICY_COND_XID
XID errors — number required in dcgmPolicyConditionParams_t.
-
enumerator DCGM_POLICY_COND_DBE
-
enum dcgmPolicyMode_enum
Enumeration for policy modes.
Values:
-
enumerator DCGM_POLICY_MODE_AUTOMATED
automatic mode
-
enumerator DCGM_POLICY_MODE_MANUAL
manual mode
-
enumerator DCGM_POLICY_MODE_AUTOMATED
-
enum dcgmPolicyIsolation_enum
Enumeration for policy isolation modes.
Values:
-
enumerator DCGM_POLICY_ISOLATION_NONE
no isolation of GPUs on error
-
enumerator DCGM_POLICY_ISOLATION_NONE
-
enum dcgmPolicyAction_enum
Enumeration for policy actions.
Values:
-
enumerator DCGM_POLICY_ACTION_NONE
no action
-
enumerator DCGM_POLICY_ACTION_GPURESET
Deprecated - perform a GPU reset on violation.
-
enumerator DCGM_POLICY_ACTION_NONE
-
enum dcgmPolicyValidation_enum
Enumeration for policy validation actions.
Values:
-
enumerator DCGM_POLICY_VALID_NONE
no validation after an action is performed
-
enumerator DCGM_POLICY_VALID_SV_SHORT
run a short System Validation on the system after failure
-
enumerator DCGM_POLICY_VALID_SV_MED
run a medium System Validation test after failure
-
enumerator DCGM_POLICY_VALID_SV_LONG
run a extensive System Validation test after failure
-
enumerator DCGM_POLICY_VALID_SV_XLONG
run a more extensive System Validation test after failure
-
enumerator DCGM_POLICY_VALID_NONE
-
enum dcgmPolicyFailureResp_enum
Enumeration for policy failure responses.
Values:
-
enumerator DCGM_POLICY_FAILURE_NONE
on failure of validation perform no action
-
enumerator DCGM_POLICY_FAILURE_NONE
-
enum dcgmHealthSystems_enum
Systems structure used to enable or disable health watch systems.
Values:
-
enumerator DCGM_HEALTH_WATCH_PCIE
PCIe system watches (must have 1m of data before query)
-
enumerator DCGM_HEALTH_WATCH_NVLINK
NVLINK system watches.
-
enumerator DCGM_HEALTH_WATCH_PMU
Power management unit watches.
-
enumerator DCGM_HEALTH_WATCH_MCU
Micro-controller unit watches.
-
enumerator DCGM_HEALTH_WATCH_MEM
Memory watches.
-
enumerator DCGM_HEALTH_WATCH_SM
Streaming multiprocessor watches.
-
enumerator DCGM_HEALTH_WATCH_INFOROM
Inforom watches.
-
enumerator DCGM_HEALTH_WATCH_THERMAL
Temperature watches (must have 1m of data before query)
-
enumerator DCGM_HEALTH_WATCH_POWER
Power watches (must have 1m of data before query)
-
enumerator DCGM_HEALTH_WATCH_DRIVER
Driver-related watches.
-
enumerator DCGM_HEALTH_WATCH_NVSWITCH_NONFATAL
Non-fatal errors in NvSwitch.
-
enumerator DCGM_HEALTH_WATCH_NVSWITCH_FATAL
Fatal errors in NvSwitch.
-
enumerator DCGM_HEALTH_WATCH_ALL
All watches enabled.
-
enumerator DCGM_HEALTH_WATCH_PCIE
-
enum dcgmHealthWatchResult_enum
Health Watch test results.
Values:
-
enumerator DCGM_HEALTH_RESULT_PASS
All results within this system are reporting normal.
-
enumerator DCGM_HEALTH_RESULT_WARN
A warning has been issued, refer to the response for more information.
-
enumerator DCGM_HEALTH_RESULT_FAIL
A failure has been issued, refer to the response for more information.
-
enumerator DCGM_HEALTH_RESULT_PASS
-
enum dcgmDiagnosticLevel_t
Enumeration for diagnostic levels.
Values:
-
enumerator DCGM_DIAG_LVL_INVALID
Uninitialized.
-
enumerator DCGM_DIAG_LVL_SHORT
run a very basic health check on the system
-
enumerator DCGM_DIAG_LVL_MED
run a medium-length diagnostic (a few minutes)
-
enumerator DCGM_DIAG_LVL_LONG
run a extensive diagnostic (several minutes)
-
enumerator DCGM_DIAG_LVL_XLONG
run a very extensive diagnostic (many minutes)
-
enumerator DCGM_DIAG_LVL_INVALID
-
enum dcgmDiagResult_enum
Diagnostic test results.
Values:
-
enumerator DCGM_DIAG_RESULT_PASS
This test passed as diagnostics.
-
enumerator DCGM_DIAG_RESULT_SKIP
This test was skipped.
-
enumerator DCGM_DIAG_RESULT_WARN
This test passed with warnings.
-
enumerator DCGM_DIAG_RESULT_FAIL
This test failed the diagnostics.
-
enumerator DCGM_DIAG_RESULT_NOT_RUN
This test wasn’t executed.
-
enumerator DCGM_DIAG_RESULT_PASS
-
enum dcgmPerGpuTestIndices_enum
Diagnostic per gpu tests - fixed indices for dcgmDiagResponsePerGpu_t.results[].
Values:
-
enumerator DCGM_MEMORY_INDEX
Memory test index.
-
enumerator DCGM_DIAGNOSTIC_INDEX
Diagnostic test index.
-
enumerator DCGM_PCI_INDEX
PCIe test index.
-
enumerator DCGM_SM_STRESS_INDEX
SM Stress test index.
-
enumerator DCGM_TARGETED_STRESS_INDEX
Targeted Stress test index.
-
enumerator DCGM_TARGETED_POWER_INDEX
Targeted Power test index.
-
enumerator DCGM_MEMORY_BANDWIDTH_INDEX
Memory bandwidth test index.
-
enumerator DCGM_MEMTEST_INDEX
Memtest test index.
-
enumerator DCGM_PULSE_TEST_INDEX
Pulse test index.
-
enumerator DCGM_EUD_TEST_INDEX
EUD test index.
-
enumerator DCGM_UNUSED2_TEST_INDEX
-
enumerator DCGM_CPU_EUD_TEST_INDEX
CPU EUD test index.
-
enumerator DCGM_UNUSED4_TEST_INDEX
-
enumerator DCGM_UNUSED5_TEST_INDEX
-
enumerator DCGM_SOFTWARE_INDEX
Software test index.
-
enumerator DCGM_CONTEXT_CREATE_INDEX
Context create test index.
-
enumerator DCGM_UNKNOWN_INDEX
Unknown test.
-
enumerator DCGM_MEMORY_INDEX
-
enum dcgmSoftwareTest_enum
Values:
-
enumerator DCGM_SWTEST_DENYLIST
test for presence of drivers on the denylist (e.g. nouveau)
-
enumerator DCGM_SWTEST_NVML_LIBRARY
test for presence (and version) of NVML lib
-
enumerator DCGM_SWTEST_CUDA_MAIN_LIBRARY
test for presence (and version) of CUDA lib
-
enumerator DCGM_SWTEST_CUDA_RUNTIME_LIBRARY
test for presence (and version) of CUDA RT lib
-
enumerator DCGM_SWTEST_PERMISSIONS
test for character device permissions
-
enumerator DCGM_SWTEST_PERSISTENCE_MODE
test for persistence mode enabled
-
enumerator DCGM_SWTEST_ENVIRONMENT
test for CUDA environment vars that may slow tests
-
enumerator DCGM_SWTEST_PAGE_RETIREMENT
test for pending frame buffer page retirement
-
enumerator DCGM_SWTEST_GRAPHICS_PROCESSES
test for graphics processes running
-
enumerator DCGM_SWTEST_INFOROM
test for inforom corruption
-
enumerator DCGM_SWTEST_DENYLIST
-
enum dcgmGpuLevel_enum
Represents level relationships within a system between two GPUs The enums are spaced to allow for future relationships.
These match the definitions in nvml.h
Values:
-
enumerator DCGM_TOPOLOGY_UNINITIALIZED
-
enumerator DCGM_TOPOLOGY_BOARD
multi-GPU board
-
enumerator DCGM_TOPOLOGY_SINGLE
all devices that only need traverse a single PCIe switch
-
enumerator DCGM_TOPOLOGY_MULTIPLE
all devices that need not traverse a host bridge
-
enumerator DCGM_TOPOLOGY_HOSTBRIDGE
all devices that are connected to the same host bridge
-
enumerator DCGM_TOPOLOGY_CPU
all devices that are connected to the same CPU but possibly multiple host bridges
-
enumerator DCGM_TOPOLOGY_SYSTEM
all devices in the system
-
enumerator DCGM_TOPOLOGY_NVLINK1
GPUs connected via a single NVLINK link.
-
enumerator DCGM_TOPOLOGY_NVLINK2
GPUs connected via two NVLINK links.
-
enumerator DCGM_TOPOLOGY_NVLINK3
GPUs connected via three NVLINK links.
-
enumerator DCGM_TOPOLOGY_NVLINK4
GPUs connected via four NVLINK links.
-
enumerator DCGM_TOPOLOGY_NVLINK5
GPUs connected via five NVLINK links.
-
enumerator DCGM_TOPOLOGY_NVLINK6
GPUs connected via six NVLINK links.
-
enumerator DCGM_TOPOLOGY_NVLINK7
GPUs connected via seven NVLINK links.
-
enumerator DCGM_TOPOLOGY_NVLINK8
GPUs connected via eight NVLINK links.
-
enumerator DCGM_TOPOLOGY_NVLINK9
GPUs connected via nine NVLINK links.
-
enumerator DCGM_TOPOLOGY_NVLINK10
GPUs connected via ten NVLINK links.
-
enumerator DCGM_TOPOLOGY_NVLINK11
GPUs connected via eleven NVLINK links.
-
enumerator DCGM_TOPOLOGY_NVLINK12
GPUs connected via twelve NVLINK links.
-
enumerator DCGM_TOPOLOGY_NVLINK13
GPUs connected via twelve NVLINK links.
-
enumerator DCGM_TOPOLOGY_NVLINK14
GPUs connected via twelve NVLINK links.
-
enumerator DCGM_TOPOLOGY_NVLINK15
GPUs connected via twelve NVLINK links.
-
enumerator DCGM_TOPOLOGY_NVLINK16
GPUs connected via twelve NVLINK links.
-
enumerator DCGM_TOPOLOGY_NVLINK17
GPUs connected via twelve NVLINK links.
-
enumerator DCGM_TOPOLOGY_NVLINK18
GPUs connected via twelve NVLINK links.
-
enumerator DCGM_TOPOLOGY_UNINITIALIZED
-
enum dcgmGpuNVLinkErrorType_enum
Identifies a GPU NVLink error type returned by DCGM_FI_DEV_GPU_NVLINK_ERRORS.
Values:
-
enumerator DCGM_GPU_NVLINK_ERROR_RECOVERY_REQUIRED
NVLink link recovery error occurred.
-
enumerator DCGM_GPU_NVLINK_ERROR_FATAL
NVLink link fatal error occurred.
-
enumerator DCGM_GPU_NVLINK_ERROR_RECOVERY_REQUIRED
-
enum dcgmNvLinkLinkState_enum
NvLink link states.
Values:
-
enumerator DcgmNvLinkLinkStateNotSupported
NvLink is unsupported by this GPU (Default for GPUs)
-
enumerator DcgmNvLinkLinkStateDisabled
NvLink is supported for this link but this link is disabled (Default for NvSwitches)
-
enumerator DcgmNvLinkLinkStateDown
This NvLink link is down (inactive)
-
enumerator DcgmNvLinkLinkStateUp
This NvLink link is up (active)
-
enumerator DcgmNvLinkLinkStateNotSupported
-
enum dcgmModuleId_t
Module IDs.
Values:
-
enumerator DcgmModuleIdCore
Core DCGM - always loaded.
-
enumerator DcgmModuleIdNvSwitch
NvSwitch Module.
-
enumerator DcgmModuleIdVGPU
VGPU Module.
-
enumerator DcgmModuleIdIntrospect
Introspection Module.
-
enumerator DcgmModuleIdHealth
Health Module.
-
enumerator DcgmModuleIdPolicy
Policy Module.
-
enumerator DcgmModuleIdConfig
Config Module.
-
enumerator DcgmModuleIdDiag
GPU Diagnostic Module.
-
enumerator DcgmModuleIdProfiling
Profiling Module.
-
enumerator DcgmModuleIdSysmon
System Monitoring Module.
-
enumerator DcgmModuleIdCount
Always last. 1 greater than largest value above.
-
enumerator DcgmModuleIdCore
-
enum dcgmModuleStatus_t
Module Status.
Modules are lazy loaded, so they will be in status DcgmModuleStatusNotLoaded until they are used. One modules are used, they will move to another status.
Values:
-
enumerator DcgmModuleStatusNotLoaded
Module has not been loaded yet.
-
enumerator DcgmModuleStatusDenylisted
Module is on the denylist; can’t be loaded.
-
enumerator DcgmModuleStatusFailed
Loading the module failed.
-
enumerator DcgmModuleStatusLoaded
Module has been loaded.
-
enumerator DcgmModuleStatusUnloaded
Module has been unloaded, happens during shutdown.
-
enumerator DcgmModuleStatusPaused
Module has been paused. This is a temporary state that will move to DcgmModuleStatusLoaded once the module is resumed. This status implies that the module is loaded.
-
enumerator DcgmModuleStatusNotLoaded
-
struct dcgm_link_s
- #include <dcgm_structs.h>
Represents a link object.
type should be one of DCGM_FE_GPU or DCGM_FE_SWITCH; gpuId or switchID is the associated gpu or switch; and index is the link index, 0-based, with TX (even) coming before RX (odd).
Public Members
-
dcgm_field_entity_group_t type
Entity Group
-
uint8_t index
Link Index Tx before Rx
-
dcgm_field_eid_t gpuId
Physical GPU ID
-
dcgm_field_eid_t switchId
Physical Switch ID
-
struct dcgm_link_s::[anonymous]::[anonymous] parsed
Broken out Link identifier GPU/SW:[GPU|SW]:Index
-
dcgm_field_eid_t raw
Raw Link ID
-
dcgm_field_entity_group_t type
-
struct dcgmConnectV2Params_v1
- #include <dcgm_structs.h>
Connection options for dcgmConnect_v2 (v1)
NOTE: This version is deprecated. use dcgmConnectV2Params_v2
Public Members
-
unsigned int version
Version number. Use dcgmConnectV2Params_version
-
unsigned int persistAfterDisconnect
Whether to persist DCGM state modified by this connection once the connection is terminated. Normally, all field watches created by a connection are removed once a connection goes away. 1 = do not clean up after this connection. 0 = clean up after this connection
-
unsigned int version
-
struct dcgmConnectV2Params_v2
- #include <dcgm_structs.h>
Connection options for dcgmConnect_v2 (v2)
Public Members
-
unsigned int version
Version number. Use dcgmConnectV2Params_version
-
unsigned int persistAfterDisconnect
Whether to persist DCGM state modified by this connection once the connection is terminated. Normally, all field watches created by a connection are removed once a connection goes away. 1 = do not clean up after this connection. 0 = clean up after this connection
-
unsigned int timeoutMs
When attempting to connect to the specified host engine, how long should we wait in milliseconds before giving up
-
unsigned int addressIsUnixSocket
Whether or not the passed-in address is a unix socket filename (1) or a TCP/IP address (0)
-
unsigned int version
-
struct dcgmHostengineHealth_v1
- #include <dcgm_structs.h>
Typedef for dcgmHostengineHealth_v1.
Public Members
-
unsigned int version
The version of this request.
-
unsigned int overallHealth
0 to indicate healthy, or a code to indicate the error
-
unsigned int version
-
struct dcgmGroupEntityPair_t
- #include <dcgm_structs.h>
Represents a entityGroupId + entityId pair to uniquely identify a given entityId inside a group of entities.
Added in DCGM 1.5.0
Public Members
-
dcgm_field_entity_group_t entityGroupId
Entity Group ID entity belongs to.
-
dcgm_field_eid_t entityId
Entity ID of the entity.
-
dcgm_field_entity_group_t entityGroupId
-
struct dcgmGroupInfo_v2
- #include <dcgm_structs.h>
Structure to store information for DCGM group.
Added in DCGM 1.5.0
Public Members
-
unsigned int version
Version Number (use dcgmGroupInfo_version2)
-
unsigned int count
count of entityIds returned in entityList
-
char groupName[256]
Group Name.
-
dcgmGroupEntityPair_t entityList[64]
List of the entities that are in this group.
-
unsigned int version
-
struct dcgmMigHierarchyInfo_t
- #include <dcgm_structs.h>
Represents a pair of entity pairings to uniquely identify an entity and its place in the hierarchy.
Public Members
-
dcgmGroupEntityPair_t entity
Entity id and type for the entity in question.
-
dcgmGroupEntityPair_t parent
Entity id and type for the parent of the entity in question.
-
dcgmMigProfile_t sliceProfile
Entity MIG profile identifier.
-
dcgmGroupEntityPair_t entity
-
struct dcgmMigEntityInfo_t
- #include <dcgm_structs.h>
Provides additional information about location of MIG entities.
Public Members
-
char gpuUuid[128]
GPU UUID
-
unsigned int nvmlGpuIndex
GPU index from NVML
-
unsigned int nvmlInstanceId
GPU instance index within GPU. 0 to N. -1 for GPU entities
-
unsigned int nvmlComputeInstanceId
GPU Compute instance index within GPU instance. 0 to N. -1 for GPU Instance and GPU entities
-
unsigned int nvmlMigProfileId
Unique profile ID for GPU or Compute instances. -1 GPU entities
See also
nvmlComputeInstanceProfileInfo_st
See also
nvmlGpuInstanceProfileInfo_st
-
unsigned int nvmlProfileSlices
Number of slices in the MIG profile
-
char gpuUuid[128]
-
struct dcgmMigHierarchyInfo_v2
-
struct dcgmMigHierarchy_v2
-
struct dcgmCpuHierarchyOwnedCores_v1
-
struct dcgmCpuHierarchy_v1
- #include <dcgm_structs.h>
Hierarchy of CPUs and their cores.
-
struct dcgmCpuHierarchyCpu_v1
-
struct dcgmCpuHierarchyCpu_v1
-
struct dcgmFieldGroupInfo_v1
- #include <dcgm_structs.h>
Structure to represent information about a field group.
Public Members
-
unsigned int version
Version number (dcgmFieldGroupInfo_version)
-
unsigned int numFieldIds
Number of entries in fieldIds[] that are valid.
-
dcgmFieldGrp_t fieldGroupId
ID of this field group.
-
char fieldGroupName[256]
Field Group Name.
-
unsigned short fieldIds[128]
Field ids that belong to this group.
-
unsigned int version
-
struct dcgmAllFieldGroup_v1
Public Members
-
unsigned int version
Version number (dcgmAllFieldGroupInfo_version)
-
unsigned int numFieldGroups
Number of entries in fieldGroups[] that are populated.
-
dcgmFieldGroupInfo_t fieldGroups[64]
Info about each field group.
-
unsigned int version
-
struct dcgmErrorInfo_t
- #include <dcgm_structs.h>
Structure to represent error attributes.
Public Members
-
unsigned int gpuId
Represents GPU ID.
-
short fieldId
One of DCGM_FI_?
-
int status
One of DCGM_ST_?
-
unsigned int gpuId
-
struct dcgmClockSet_v1
- #include <dcgm_structs.h>
Represents a set of memory, SM, and video clocks for a device.
This can be current values or a target values based on context
Public Members
-
int version
Version Number (dcgmClockSet_version)
-
unsigned int memClock
Memory Clock (Memory Clock value OR DCGM_INT32_BLANK to Ignore/Use compatible value with smClk)
-
unsigned int smClock
SM Clock (SM Clock value OR DCGM_INT32_BLANK to Ignore/Use compatible value with memClk)
-
int version
-
struct dcgmDeviceSupportedClockSets_v1
- #include <dcgm_structs.h>
Represents list of supported clock sets for a device.
Public Members
-
unsigned int version
Version Number (dcgmDeviceSupportedClockSets_version)
-
unsigned int count
Number of supported clocks.
-
dcgmClockSet_t clockSet[256]
Valid clock sets for the device. Upto count entries are filled.
-
unsigned int version
-
struct dcgmDevicePidAccountingStats_v1
- #include <dcgm_structs.h>
Represents accounting data for one process.
Public Members
-
unsigned int version
Version Number. Should match dcgmDevicePidAccountingStats_version.
-
unsigned int pid
Process id of the process these stats are for.
-
unsigned int gpuUtilization
Percent of time over the process’s lifetime during which one or more kernels was executing on the GPU.
Set to DCGM_INT32_NOT_SUPPORTED if is not supported
-
unsigned int memoryUtilization
Percent of time over the process’s lifetime during which global (device) memory was being read or written.
Set to DCGM_INT32_NOT_SUPPORTED if is not supported
-
unsigned long long maxMemoryUsage
Maximum total memory in bytes that was ever allocated by the process.
Set to DCGM_INT64_NOT_SUPPORTED if is not supported
-
unsigned long long startTimestamp
CPU Timestamp in usec representing start time for the process.
-
unsigned long long activeTimeUsec
Amount of time in usec during which the compute context was active.
Note that this does not mean the context was being used. endTimestamp can be computed as startTimestamp + activeTime
-
unsigned int version
-
struct dcgmDeviceThermals_v1
- #include <dcgm_structs.h>
Represents thermal information.
Public Members
-
unsigned int version
Version Number.
-
unsigned int slowdownTemp
Slowdown temperature.
-
unsigned int shutdownTemp
Shutdown temperature.
-
unsigned int version
-
struct dcgmDevicePowerLimits_v1
- #include <dcgm_structs.h>
Represents various power limits.
Public Members
-
unsigned int version
Version Number.
-
unsigned int curPowerLimit
Power management limit associated with this device (in W)
-
unsigned int defaultPowerLimit
Power management limit effective at device boot (in W)
-
unsigned int enforcedPowerLimit
Effective power limit that the driver enforces after taking into account all limiters (in W)
-
unsigned int minPowerLimit
Minimum power management limit (in W)
-
unsigned int maxPowerLimit
Maximum power management limit (in W)
-
unsigned int version
-
struct dcgmDeviceIdentifiers_v1
- #include <dcgm_structs.h>
Represents device identifiers.
Public Members
-
unsigned int version
Version Number (dcgmDeviceIdentifiers_version)
-
char brandName[256]
Brand Name.
-
char deviceName[256]
Name of the device.
-
char pciBusId[256]
PCI Bus ID.
-
char serial[256]
Serial for the device.
-
char uuid[256]
UUID for the device.
-
char vbios[256]
VBIOS version.
-
char inforomImageVersion[256]
Inforom Image version.
-
unsigned int pciDeviceId
The combined 16-bit device id and 16-bit vendor id.
-
unsigned int pciSubSystemId
The 32-bit Sub System Device ID.
-
char driverVersion[256]
Driver Version.
-
unsigned int virtualizationMode
Virtualization Mode.
-
unsigned int version
-
struct dcgmDeviceMemoryUsage_v1
- #include <dcgm_structs.h>
Represents device memory and usage.
Public Members
-
unsigned int version
Version Number (dcgmDeviceMemoryUsage_version)
-
unsigned int bar1Total
Total BAR1 size in megabytes.
-
unsigned int fbTotal
Total framebuffer memory in megabytes.
-
unsigned int fbUsed
Used framebuffer memory in megabytes.
-
unsigned int fbFree
Free framebuffer memory in megabytes.
-
unsigned int version
-
struct dcgmDeviceVgpuUtilInfo_v1
- #include <dcgm_structs.h>
Represents utilization values for vGPUs running on the device.
Public Members
-
unsigned int version
Version Number (dcgmDeviceVgpuUtilInfo_version)
-
unsigned int vgpuId
vGPU instance ID
-
unsigned int smUtil
GPU utilization for vGPU.
-
unsigned int memUtil
Memory utilization for vGPU.
-
unsigned int encUtil
Encoder utilization for vGPU.
-
unsigned int decUtil
Decoder utilization for vGPU.
-
unsigned int version
-
struct dcgmDeviceEncStats_v1
- #include <dcgm_structs.h>
Represents current encoder statistics for the given device/vGPU instance.
Public Members
-
unsigned int version
Version Number (dcgmDeviceEncStats_version)
-
unsigned int sessionCount
Count of active encoder sessions.
-
unsigned int averageFps
Trailing average FPS of all active sessions.
-
unsigned int averageLatency
Encode latency in milliseconds.
-
unsigned int version
-
struct dcgmDeviceFbcStats_v1
- #include <dcgm_structs.h>
Represents current frame buffer capture sessions statistics for the given device/vGPU instance.
Public Members
-
unsigned int version
Version Number (dcgmDeviceFbcStats_version)
-
unsigned int sessionCount
Count of active FBC sessions.
-
unsigned int averageFps
Moving average new frames captured per second.
-
unsigned int averageLatency
Moving average new frame capture latency in microseconds.
-
unsigned int version
-
struct dcgmDeviceFbcSessionInfo_v1
- #include <dcgm_structs.h>
Represents information about active FBC session on the given device/vGPU instance.
Public Members
-
unsigned int version
Version Number (dcgmDeviceFbcSessionInfo_version)
-
unsigned int sessionId
Unique session ID.
-
unsigned int pid
Owning process ID.
-
unsigned int vgpuId
vGPU instance ID (only valid on vGPU hosts, otherwise zero)
-
unsigned int displayOrdinal
Display identifier.
-
dcgmFBCSessionType_t sessionType
Type of frame buffer capture session.
-
unsigned int sessionFlags
Session flags.
-
unsigned int hMaxResolution
Max horizontal resolution supported by the capture session.
-
unsigned int vMaxResolution
Max vertical resolution supported by the capture session.
-
unsigned int hResolution
Horizontal resolution requested by caller in capture call.
-
unsigned int vResolution
Vertical resolution requested by caller in capture call.
-
unsigned int averageFps
Moving average new frames captured per second.
-
unsigned int averageLatency
Moving average new frame capture latency in microseconds.
-
unsigned int version
-
struct dcgmDeviceFbcSessions_v1
- #include <dcgm_structs.h>
Represents all the active FBC sessions on the given device/vGPU instance.
Public Members
-
unsigned int version
Version Number (dcgmDeviceFbcSessions_version)
-
unsigned int sessionCount
Count of active FBC sessions.
-
dcgmDeviceFbcSessionInfo_t sessionInfo[256]
Info about the active FBC session.
-
unsigned int version
-
struct dcgmDeviceVgpuEncSessions_v1
- #include <dcgm_structs.h>
Represents information about active encoder sessions on the given vGPU instance.
Public Members
-
unsigned int version
Version Number (dcgmDeviceVgpuEncSessions_version)
-
unsigned int vgpuId
vGPU instance ID
-
unsigned int sessionId
Unique session ID.
-
unsigned int pid
Process ID.
-
dcgmEncoderType_t codecType
Video encoder type.
-
unsigned int hResolution
Current encode horizontal resolution.
-
unsigned int vResolution
Current encode vertical resolution.
-
unsigned int averageFps
Moving average encode frames per second.
-
unsigned int averageLatency
Moving average encode latency in milliseconds.
-
unsigned int version
-
struct dcgmDeviceVgpuProcessUtilInfo_v1
- #include <dcgm_structs.h>
Represents utilization values for processes running in vGPU VMs using the device.
Public Members
-
unsigned int version
Version Number (dcgmDeviceVgpuProcessUtilInfo_version)
-
unsigned int vgpuId
vGPU instance ID
-
unsigned int vgpuProcessSamplesCount
Count of processes running in the vGPU VM,for which utilization rates are being reported in this cycle.
-
unsigned int pid
Process ID of the process running in the vGPU VM.
-
char processName[64]
Process Name of process running in the vGPU VM.
-
unsigned int smUtil
GPU utilization of process running in the vGPU VM.
-
unsigned int memUtil
Memory utilization of process running in the vGPU VM.
-
unsigned int encUtil
Encoder utilization of process running in the vGPU VM.
-
unsigned int decUtil
Decoder utilization of process running in the vGPU VM.
-
unsigned int version
-
struct dcgmDeviceVgpuTypeInfo_v1
- #include <dcgm_structs.h>
Represents static info related to vGPUs supported on the device.
Public Members
-
unsigned int version
Version number (dcgmDeviceVgpuTypeInfo_version)
-
union dcgmDeviceVgpuTypeInfo_v1::[anonymous] vgpuTypeInfo
vGPU type ID and Supported vGPU type count
-
char vgpuTypeName[64]
vGPU type Name
-
char vgpuTypeClass[64]
Class of vGPU type.
-
char vgpuTypeLicense[128]
license of vGPU type
-
int deviceId
device ID of vGPU type
-
int subsystemId
Subsystem ID of vGPU type.
-
int numDisplayHeads
Count of vGPU’s supported display heads.
-
int maxInstances
maximum number of vGPU instances creatable on a device for given vGPU type
-
int frameRateLimit
Frame rate limit value of the vGPU type.
-
int maxResolutionX
vGPU display head’s maximum supported resolution in X dimension
-
int maxResolutionY
vGPU display head’s maximum supported resolution in Y dimension
-
int fbTotal
vGPU Total framebuffer size in megabytes
-
unsigned int version
-
struct dcgmDeviceVgpuTypeInfo_v2
Public Members
-
unsigned int version
Version number (dcgmDeviceVgpuTypeInfo_version2)
-
union dcgmDeviceVgpuTypeInfo_v2::[anonymous] vgpuTypeInfo
vGPU type ID and Supported vGPU type count
-
char vgpuTypeName[64]
vGPU type Name
-
char vgpuTypeClass[64]
Class of vGPU type.
-
char vgpuTypeLicense[128]
license of vGPU type
-
int deviceId
device ID of vGPU type
-
int subsystemId
Subsystem ID of vGPU type.
-
int numDisplayHeads
Count of vGPU’s supported display heads.
-
int maxInstances
maximum number of vGPU instances creatable on a device for given vGPU type
-
int frameRateLimit
Frame rate limit value of the vGPU type.
-
int maxResolutionX
vGPU display head’s maximum supported resolution in X dimension
-
int maxResolutionY
vGPU display head’s maximum supported resolution in Y dimension
-
int fbTotal
vGPU Total framebuffer size in megabytes
-
int gpuInstanceProfileId
GPU Instance Profile ID for the given vGPU type.
-
unsigned int version
-
struct dcgmDeviceSupportedVgpuTypeInfo_v1
- #include <dcgm_structs.h>
Represents the info related to vGPUs supported on the device.
Public Members
-
unsigned int version
Version number (dcgmDeviceSupportedVgpuTypeInfo_version)
-
unsigned long long deviceId
device ID of vGPU type
-
unsigned long long subsystemId
Subsystem ID of vGPU type.
-
unsigned int numDisplayHeads
Count of vGPU’s supported display heads.
-
unsigned int maxInstances
maximum number of vGPU instances creatable on a device for given vGPU type
-
unsigned int frameRateLimit
Frame rate limit value of the vGPU type.
-
unsigned int maxResolutionX
vGPU display head’s maximum supported resolution in X dimension
-
unsigned int maxResolutionY
vGPU display head’s maximum supported resolution in Y dimension
-
unsigned long long fbTotal
vGPU Total framebuffer size in megabytes
-
unsigned int gpuInstanceProfileId
GPU Instance Profile ID for the given vGPU type.
-
unsigned int version
-
struct dcgmDeviceSettings_v2
-
struct dcgmDeviceAttributes_v3
Public Members
-
unsigned int version
Version number (dcgmDeviceAttributes_version)
-
dcgmDeviceSupportedClockSets_t clockSets
Supported clocks for the device.
-
dcgmDeviceThermals_t thermalSettings
Thermal settings for the device.
-
dcgmDevicePowerLimits_t powerLimits
Various power limits for the device.
-
dcgmDeviceIdentifiers_t identifiers
Identifiers for the device.
-
dcgmDeviceMemoryUsage_t memoryUsage
Memory usage info for the device.
-
dcgmDeviceSettings_v2 settings
Basic device settings.
-
unsigned int version
-
struct dcgmDeviceMigAttributesInfo_v1
- #include <dcgm_structs.h>
Structure to represent attributes info for a MIG device.
Public Members
-
unsigned int version
Version Number (dcgmDeviceMigAttributesInfo_version)
-
unsigned int gpuInstanceId
GPU instance ID.
-
unsigned int computeInstanceId
Compute instance ID.
-
unsigned int multiprocessorCount
Streaming Multiprocessor count.
Shared Copy Engine count.
Shared Decoder Engine count.
Shared Encoder Engine count.
Shared JPEG Engine count.
Shared OFA Engine count.
-
unsigned int gpuInstanceSliceCount
GPU instance slice count.
-
unsigned int computeInstanceSliceCount
Compute instance slice count.
-
unsigned long long memorySizeMB
Device memory size (in MiB)
-
unsigned int version
-
struct dcgmDeviceMigAttributes_v1
- #include <dcgm_structs.h>
Structure to represent attributes for a MIG device.
Public Members
-
unsigned int version
Version Number (dcgmDeviceMigAttributes_version)
-
unsigned int migDevicesCount
Count of MIG devices.
-
dcgmDeviceMigAttributesInfo_v1 migAttributesInfo
MIG attributes information.
-
unsigned int version
-
struct dcgmGpuInstanceProfileInfo_v1
- #include <dcgm_structs.h>
Structure to represent GPU instance profile information.
Public Members
-
unsigned int version
Version Number (dcgmGpuInstanceProfileInfo_version)
-
unsigned int id
Unique profile ID within the device.
-
unsigned int isP2pSupported
Peer-to-Peer support.
-
unsigned int sliceCount
GPU Slice count.
-
unsigned int instanceCount
GPU instance count.
-
unsigned int multiprocessorCount
Streaming Multiprocessor count.
-
unsigned int copyEngineCount
Copy Engine count.
-
unsigned int decoderCount
Decoder Engine count.
-
unsigned int encoderCount
Encoder Engine count.
-
unsigned int jpegCount
JPEG Engine count.
-
unsigned int ofaCount
OFA Engine count.
-
unsigned long long memorySizeMB
Memory size in MBytes.
-
unsigned int version
-
struct dcgmGpuInstanceProfiles_v1
- #include <dcgm_structs.h>
Structure to represent GPU instance profiles.
Public Members
-
unsigned int version
Version Number (dcgmGpuInstanceProfiles_version)
-
unsigned int profileCount
Profile count.
-
dcgmGpuInstanceProfileInfo_v1 profileInfo
GPU instance profile information.
-
unsigned int version
-
struct dcgmComputeInstanceProfileInfo_v1
- #include <dcgm_structs.h>
Structure to represent Compute instance profile information.
Public Members
-
unsigned int version
Version Number (dcgmComputeInstanceProfileInfo_version)
-
unsigned int gpuInstanceId
GPU instance ID.
-
unsigned int id
Unique profile ID within the GPU instance.
-
unsigned int sliceCount
GPU Slice count.
-
unsigned int instanceCount
Compute instance count.
-
unsigned int multiprocessorCount
Streaming Multiprocessor count.
Shared Copy Engine count.
Shared Decoder Engine count.
Shared Encoder Engine count.
Shared JPEG Engine count.
Shared OFA Engine count.
-
unsigned int version
-
struct dcgmComputeInstanceProfiles_v1
- #include <dcgm_structs.h>
Structure to represent Compute instance profiles.
Public Members
-
unsigned int version
Version Number (dcgmComputeInstanceProfiles_version)
-
unsigned int profileCount
Profile count.
-
dcgmComputeInstanceProfileInfo_v1 profileInfo
Compute instance profile information.
-
unsigned int version
-
struct dcgmConfigPerfStateSettings_t
- #include <dcgm_structs.h>
Used to represent Performance state settings.
Public Members
-
unsigned int syncBoost
Sync Boost Mode (0: Disabled, 1 : Enabled, DCGM_INT32_BLANK : Ignored).
Note that using this setting may result in lower clocks than targetClocks
-
dcgmClockSet_t targetClocks
Target clocks.
Set smClock and memClock to DCGM_INT32_BLANK to ignore/use compatible values. For GPUs > Maxwell, setting this implies autoBoost=0
-
unsigned int syncBoost
-
struct dcgmConfigPowerLimit_t
- #include <dcgm_structs.h>
Used to represents the power capping limit for each GPU in the group or to represent the power budget for the entire group.
Public Members
-
dcgmConfigPowerLimitType_t type
Flag to represent power cap for each GPU or power budget for the group of GPUs.
-
unsigned int val
Power Limit in Watts (Set a value OR DCGM_INT32_BLANK to Ignore)
-
dcgmConfigPowerLimitType_t type
-
struct dcgmConfig_v1
- #include <dcgm_structs.h>
Structure to represent default and target configuration for a device.
Public Members
-
unsigned int version
Version number (dcgmConfig_version)
-
unsigned int gpuId
GPU ID.
-
unsigned int eccMode
ECC Mode (0: Disabled, 1 : Enabled, DCGM_INT32_BLANK : Ignored)
-
unsigned int computeMode
Compute Mode (One of DCGM_CONFIG_COMPUTEMODE_? OR DCGM_INT32_BLANK to Ignore)
-
dcgmConfigPerfStateSettings_t perfState
Performance State Settings (clocks / boost mode)
-
dcgmConfigPowerLimit_t powerLimit
Power Limits.
-
unsigned int version
-
struct dcgmPolicyViolation_v1
Public Members
-
unsigned int version
Version number (dcgmPolicyViolation_version)
-
unsigned int notifyOnEccDbe
true/false notification on ECC Double Bit Errors
- unsig
-
unsigned int version
-
DCGM_HOME_DIR_VAR_NAME "DCGM_HOME_DIR"