NVML API Reference Guide (PDF) - vR470 (older) - Last updated July 29, 2021 - Send Feedback

2.27. Multi Instance GPU Management

This chapter describes NVML operations that are associated with Multi Instance GPU management.

Defines

#define NVML_COMPUTE_INSTANCE_ENGINE_PROFILE_SHARED 0x0
All the engines except multiprocessors would be shared.
#define NVML_COMPUTE_INSTANCE_PROFILE_1_SLICE 0x0
#define NVML_DEVICE_MIG_DISABLE 0x0
#define NVML_DEVICE_MIG_ENABLE 0x1
#define NVML_GPU_INSTANCE_PROFILE_1_SLICE 0x0

Functions

nvmlReturn_t nvmlComputeInstanceDestroy ( nvmlComputeInstance_t computeInstance )
nvmlReturn_t nvmlComputeInstanceGetInfo_v2 ( nvmlComputeInstance_t computeInstance, nvmlComputeInstanceInfo_t* info )
nvmlReturn_t nvmlDeviceCreateGpuInstance ( nvmlDevice_t device, unsigned int  profileId, nvmlGpuInstance_t* gpuInstance )
nvmlReturn_t nvmlDeviceCreateGpuInstanceWithPlacement ( nvmlDevice_t device, unsigned int  profileId, const nvmlGpuInstancePlacement_t* placement, nvmlGpuInstance_t* gpuInstance )
nvmlReturn_t nvmlDeviceGetComputeInstanceId ( nvmlDevice_t device, unsigned int* id )
nvmlReturn_t nvmlDeviceGetDeviceHandleFromMigDeviceHandle ( nvmlDevice_t migDevice, nvmlDevice_t* device )
nvmlReturn_t nvmlDeviceGetGpuInstanceById ( nvmlDevice_t device, unsigned int  id, nvmlGpuInstance_t* gpuInstance )
nvmlReturn_t nvmlDeviceGetGpuInstanceId ( nvmlDevice_t device, unsigned int* id )
nvmlReturn_t nvmlDeviceGetGpuInstancePossiblePlacements_v2 ( nvmlDevice_t device, unsigned int  profileId, nvmlGpuInstancePlacement_t* placements, unsigned int* count )
nvmlReturn_t nvmlDeviceGetGpuInstanceProfileInfo ( nvmlDevice_t device, unsigned int  profile, nvmlGpuInstanceProfileInfo_t* info )
nvmlReturn_t nvmlDeviceGetGpuInstanceRemainingCapacity ( nvmlDevice_t device, unsigned int  profileId, unsigned int* count )
nvmlReturn_t nvmlDeviceGetGpuInstances ( nvmlDevice_t device, unsigned int  profileId, nvmlGpuInstance_t* gpuInstances, unsigned int* count )
nvmlReturn_t nvmlDeviceGetMaxMigDeviceCount ( nvmlDevice_t device, unsigned int* count )
nvmlReturn_t nvmlDeviceGetMigDeviceHandleByIndex ( nvmlDevice_t device, unsigned int  index, nvmlDevice_t* migDevice )
nvmlReturn_t nvmlDeviceGetMigMode ( nvmlDevice_t device, unsigned int* currentMode, unsigned int* pendingMode )
nvmlReturn_t nvmlDeviceIsMigDeviceHandle ( nvmlDevice_t device, unsigned int* isMigDevice )
nvmlReturn_t nvmlDeviceSetMigMode ( nvmlDevice_t device, unsigned int  mode, nvmlReturn_t* activationStatus )
nvmlReturn_t nvmlGpuInstanceCreateComputeInstance ( nvmlGpuInstance_t gpuInstance, unsigned int  profileId, nvmlComputeInstance_t* computeInstance )
nvmlReturn_t nvmlGpuInstanceDestroy ( nvmlGpuInstance_t gpuInstance )
nvmlReturn_t nvmlGpuInstanceGetComputeInstanceById ( nvmlGpuInstance_t gpuInstance, unsigned int  id, nvmlComputeInstance_t* computeInstance )
nvmlReturn_t nvmlGpuInstanceGetComputeInstanceProfileInfo ( nvmlGpuInstance_t gpuInstance, unsigned int  profile, unsigned int  engProfile, nvmlComputeInstanceProfileInfo_t* info )
nvmlReturn_t nvmlGpuInstanceGetComputeInstanceRemainingCapacity ( nvmlGpuInstance_t gpuInstance, unsigned int  profileId, unsigned int* count )
nvmlReturn_t nvmlGpuInstanceGetComputeInstances ( nvmlGpuInstance_t gpuInstance, unsigned int  profileId, nvmlComputeInstance_t* computeInstances, unsigned int* count )
nvmlReturn_t nvmlGpuInstanceGetInfo ( nvmlGpuInstance_t gpuInstance, nvmlGpuInstanceInfo_t* info )

Defines

#define NVML_COMPUTE_INSTANCE_ENGINE_PROFILE_SHARED 0x0

#define NVML_COMPUTE_INSTANCE_PROFILE_1_SLICE 0x0

Compute instance profiles.

These macros should be passed to nvmlGpuInstanceGetComputeInstanceProfileInfo to retrieve the detailed information about a compute instance such as profile ID, engine counts

#define NVML_DEVICE_MIG_DISABLE 0x0

Disable Multi Instance GPU mode.

#define NVML_DEVICE_MIG_ENABLE 0x1

Enable Multi Instance GPU mode.

#define NVML_GPU_INSTANCE_PROFILE_1_SLICE 0x0

GPU instance profiles.

These macros should be passed to nvmlDeviceGetGpuInstanceProfileInfo to retrieve the detailed information about a GPU instance such as profile ID, engine counts.

Functions

nvmlReturn_t nvmlComputeInstanceDestroy ( nvmlComputeInstance_t computeInstance )
Parameters
computeInstance
The compute instance handle
Returns

Description

Destroy compute instance.

For Ampere or newer fully supported devices. Supported on Linux only. Requires privileged user.

nvmlReturn_t nvmlComputeInstanceGetInfo_v2 ( nvmlComputeInstance_t computeInstance, nvmlComputeInstanceInfo_t* info )
Parameters
computeInstance
The compute instance handle
info
Return compute instance information
Returns

Description

Get compute instance information.

For Ampere or newer fully supported devices. Supported on Linux only.

nvmlReturn_t nvmlDeviceCreateGpuInstance ( nvmlDevice_t device, unsigned int  profileId, nvmlGpuInstance_t* gpuInstance )
Parameters
device
The identifier of the target device
profileId
The GPU instance profile ID. See nvmlDeviceGetGpuInstanceProfileInfo
gpuInstance
Returns the GPU instance handle
Returns

Description

Create GPU instance.

For Ampere or newer fully supported devices. Supported on Linux only. Requires privileged user.

If the parent device is unbound, reset or the GPU instance is destroyed explicitly, the GPU instance handle would become invalid. The GPU instance must be recreated to acquire a valid handle.

nvmlReturn_t nvmlDeviceCreateGpuInstanceWithPlacement ( nvmlDevice_t device, unsigned int  profileId, const nvmlGpuInstancePlacement_t* placement, nvmlGpuInstance_t* gpuInstance )
Parameters
device
The identifier of the target device
profileId
The GPU instance profile ID. See nvmlDeviceGetGpuInstanceProfileInfo
placement
The requested placement. See nvmlDeviceGetGpuInstancePossiblePlacements_v2
gpuInstance
Returns the GPU instance handle
Returns

Description

Create GPU instance with the specified placement.

For Ampere or newer fully supported devices. Supported on Linux only. Requires privileged user.

If the parent device is unbound, reset or the GPU instance is destroyed explicitly, the GPU instance handle would become invalid. The GPU instance must be recreated to acquire a valid handle.

nvmlReturn_t nvmlDeviceGetComputeInstanceId ( nvmlDevice_t device, unsigned int* id )
Parameters
device
Target MIG device handle
id
Compute instance ID
Returns

Description

Get compute instance ID for the given MIG device handle.

Compute instance IDs are unique per GPU instance and remain valid until the compute instance is destroyed.

For Ampere or newer fully supported devices. Supported on Linux only.

nvmlReturn_t nvmlDeviceGetDeviceHandleFromMigDeviceHandle ( nvmlDevice_t migDevice, nvmlDevice_t* device )
Parameters
migDevice
MIG device handle
device
Device handle
Returns

Description

Get parent device handle from a MIG device handle.

For Ampere or newer fully supported devices. Supported on Linux only.

nvmlReturn_t nvmlDeviceGetGpuInstanceById ( nvmlDevice_t device, unsigned int  id, nvmlGpuInstance_t* gpuInstance )
Parameters
device
The identifier of the target device
id
The GPU instance ID
gpuInstance
Returns GPU instance
Returns

Description

Get GPU instances for given instance ID.

For Ampere or newer fully supported devices. Supported on Linux only. Requires privileged user.

nvmlReturn_t nvmlDeviceGetGpuInstanceId ( nvmlDevice_t device, unsigned int* id )
Parameters
device
Target MIG device handle
id
GPU instance ID
Returns

Description

Get GPU instance ID for the given MIG device handle.

GPU instance IDs are unique per device and remain valid until the GPU instance is destroyed.

For Ampere or newer fully supported devices. Supported on Linux only.

nvmlReturn_t nvmlDeviceGetGpuInstancePossiblePlacements_v2 ( nvmlDevice_t device, unsigned int  profileId, nvmlGpuInstancePlacement_t* placements, unsigned int* count )
Parameters
device
The identifier of the target device
profileId
The GPU instance profile ID. See nvmlDeviceGetGpuInstanceProfileInfo
placements
Returns placements allowed for the profile. Can be NULL to discover number of allowed placements for this profile. If non-NULL must be large enough to accommodate the placements supported by the profile.
count
Returns number of allowed placemenets for the profile.
Returns

Description

Get GPU instance placements.

A placement represents the location of a GPU instance within a device. This API only returns all the possible placements for the given profile.

For Ampere or newer fully supported devices. Supported on Linux only. Requires privileged user.

nvmlReturn_t nvmlDeviceGetGpuInstanceProfileInfo ( nvmlDevice_t device, unsigned int  profile, nvmlGpuInstanceProfileInfo_t* info )
Parameters
device
The identifier of the target device
profile
One of the NVML_GPU_INSTANCE_PROFILE_*
info
Returns detailed profile information
Returns

Description

Get GPU instance profile information.

Information provided by this API is immutable throughout the lifetime of a MIG mode.

For Ampere or newer fully supported devices. Supported on Linux only. Requires privileged user.

nvmlReturn_t nvmlDeviceGetGpuInstanceRemainingCapacity ( nvmlDevice_t device, unsigned int  profileId, unsigned int* count )
Parameters
device
The identifier of the target device
profileId
The GPU instance profile ID. See nvmlDeviceGetGpuInstanceProfileInfo
count
Returns remaining instance count for the profile ID
Returns

Description

Get GPU instance profile capacity.

For Ampere or newer fully supported devices. Supported on Linux only. Requires privileged user.

nvmlReturn_t nvmlDeviceGetGpuInstances ( nvmlDevice_t device, unsigned int  profileId, nvmlGpuInstance_t* gpuInstances, unsigned int* count )
Parameters
device
The identifier of the target device
profileId
The GPU instance profile ID. See nvmlDeviceGetGpuInstanceProfileInfo
gpuInstances
Returns pre-exiting GPU instances, the buffer must be large enough to accommodate the instances supported by the profile. See nvmlDeviceGetGpuInstanceProfileInfo
count
The count of returned GPU instances
Returns

Description

Get GPU instances for given profile ID.

For Ampere or newer fully supported devices. Supported on Linux only. Requires privileged user.

nvmlReturn_t nvmlDeviceGetMaxMigDeviceCount ( nvmlDevice_t device, unsigned int* count )
Parameters
device
Target device handle
count
Count of MIG devices
Returns

Description

Get the maximum number of MIG devices that can exist under a given parent NVML device.

Returns zero if MIG is not supported or enabled.

For Ampere or newer fully supported devices. Supported on Linux only.

nvmlReturn_t nvmlDeviceGetMigDeviceHandleByIndex ( nvmlDevice_t device, unsigned int  index, nvmlDevice_t* migDevice )
Parameters
device
Reference to the parent GPU device handle
index
Index of the MIG device
migDevice
Reference to the MIG device handle
Returns

Description

Get MIG device handle for the given index under its parent NVML device.

If the compute instance is destroyed either explicitly or by destroying, resetting or unbinding the parent GPU instance or the GPU device itself the MIG device handle would remain invalid and must be requested again using this API. Handles may be reused and their properties can change in the process.

For Ampere or newer fully supported devices. Supported on Linux only.

nvmlReturn_t nvmlDeviceGetMigMode ( nvmlDevice_t device, unsigned int* currentMode, unsigned int* pendingMode )
Parameters
device
The identifier of the target device
currentMode
Returns the current mode, NVML_DEVICE_MIG_DISABLE or NVML_DEVICE_MIG_ENABLE
pendingMode
Returns the pending mode, NVML_DEVICE_MIG_DISABLE or NVML_DEVICE_MIG_ENABLE
Returns

Description

Get MIG mode for the device.

For Ampere or newer fully supported devices.

Changing MIG modes may require device unbind or reset. The "pending" MIG mode refers to the target mode following the next activation trigger.

nvmlReturn_t nvmlDeviceIsMigDeviceHandle ( nvmlDevice_t device, unsigned int* isMigDevice )
Parameters
device
NVML handle to test
isMigDevice
True when handle refers to a MIG device
Returns

Description

Test if the given handle refers to a MIG device.

A MIG device handle is an NVML abstraction which maps to a MIG compute instance. These overloaded references can be used (with some restrictions) interchangeably with a GPU device handle to execute queries at a per-compute instance granularity.

For Ampere or newer fully supported devices. Supported on Linux only.

nvmlReturn_t nvmlDeviceSetMigMode ( nvmlDevice_t device, unsigned int  mode, nvmlReturn_t* activationStatus )
Parameters
device
The identifier of the target device
mode
The mode to be set, NVML_DEVICE_MIG_DISABLE or NVML_DEVICE_MIG_ENABLE
activationStatus
The activationStatus status
Returns

Description

Set MIG mode for the device.

For Ampere or newer fully supported devices. Requires root user.

This mode determines whether a GPU instance can be created.

This API may unbind or reset the device to activate the requested mode. Thus, the attributes associated with the device, such as minor number, might change. The caller of this API is expected to query such attributes again.

On certain platforms like pass-through virtualization, where reset functionality may not be exposed directly, VM reboot is required. activationStatus would return NVML_ERROR_RESET_REQUIRED for such cases.

activationStatus would return the appropriate error code upon unsuccessful activation. For example, if device unbind fails because the device isn't idle, NVML_ERROR_IN_USE would be returned. The caller of this API is expected to idle the device and retry setting the mode.

Note:

On Windows, only disabling MIG mode is supported. activationStatus would return NVML_ERROR_NOT_SUPPORTED as GPU reset is not supported on Windows through this API.

nvmlReturn_t nvmlGpuInstanceCreateComputeInstance ( nvmlGpuInstance_t gpuInstance, unsigned int  profileId, nvmlComputeInstance_t* computeInstance )
Parameters
gpuInstance
The identifier of the target GPU instance
profileId
The compute instance profile ID. See nvmlGpuInstanceGetComputeInstanceProfileInfo
computeInstance
Returns the compute instance handle
Returns

Description

Create compute instance.

For Ampere or newer fully supported devices. Supported on Linux only. Requires privileged user.

If the parent device is unbound, reset or the parent GPU instance is destroyed or the compute instance is destroyed explicitly, the compute instance handle would become invalid. The compute instance must be recreated to acquire a valid handle.

nvmlReturn_t nvmlGpuInstanceDestroy ( nvmlGpuInstance_t gpuInstance )
Parameters
gpuInstance
The GPU instance handle
Returns

Description

Destroy GPU instance.

For Ampere or newer fully supported devices. Supported on Linux only. Requires privileged user.

nvmlReturn_t nvmlGpuInstanceGetComputeInstanceById ( nvmlGpuInstance_t gpuInstance, unsigned int  id, nvmlComputeInstance_t* computeInstance )
Parameters
gpuInstance
The identifier of the target GPU instance
id
The compute instance ID
computeInstance
Returns compute instance
Returns

Description

Get compute instance for given instance ID.

For Ampere or newer fully supported devices. Supported on Linux only. Requires privileged user.

nvmlReturn_t nvmlGpuInstanceGetComputeInstanceProfileInfo ( nvmlGpuInstance_t gpuInstance, unsigned int  profile, unsigned int  engProfile, nvmlComputeInstanceProfileInfo_t* info )
Parameters
gpuInstance
The identifier of the target GPU instance
profile
One of the NVML_COMPUTE_INSTANCE_PROFILE_*
engProfile
One of the NVML_COMPUTE_INSTANCE_ENGINE_PROFILE_*
info
Returns detailed profile information
Returns

Description

Get compute instance profile information.

Information provided by this API is immutable throughout the lifetime of a MIG mode.

For Ampere or newer fully supported devices. Supported on Linux only. Requires privileged user.

nvmlReturn_t nvmlGpuInstanceGetComputeInstanceRemainingCapacity ( nvmlGpuInstance_t gpuInstance, unsigned int  profileId, unsigned int* count )
Parameters
gpuInstance
The identifier of the target GPU instance
profileId
The compute instance profile ID. See nvmlGpuInstanceGetComputeInstanceProfileInfo
count
Returns remaining instance count for the profile ID
Returns

Description

Get compute instance profile capacity.

For Ampere or newer fully supported devices. Supported on Linux only. Requires privileged user.

nvmlReturn_t nvmlGpuInstanceGetComputeInstances ( nvmlGpuInstance_t gpuInstance, unsigned int  profileId, nvmlComputeInstance_t* computeInstances, unsigned int* count )
Parameters
gpuInstance
The identifier of the target GPU instance
profileId
The compute instance profile ID. See nvmlGpuInstanceGetComputeInstanceProfileInfo
computeInstances
Returns pre-exiting compute instances, the buffer must be large enough to accommodate the instances supported by the profile. See nvmlGpuInstanceGetComputeInstanceProfileInfo
count
The count of returned compute instances
Returns

Description

Get compute instances for given profile ID.

For Ampere or newer fully supported devices. Supported on Linux only. Requires privileged user.

nvmlReturn_t nvmlGpuInstanceGetInfo ( nvmlGpuInstance_t gpuInstance, nvmlGpuInstanceInfo_t* info )
Parameters
gpuInstance
The GPU instance handle
info
Return GPU instance information
Returns

Description

Get GPU instance information.

For Ampere or newer fully supported devices. Supported on Linux only.


NVML API Reference Guide (PDF) - vR470 (older) - Last updated July 29, 2021 - Send Feedback