nemo_microservices.types.v2.inference.nim_deployment_param#

Module Contents#

Classes#

API#

class nemo_microservices.types.v2.inference.nim_deployment_param.NIMDeploymentParam#

Bases: typing_extensions.TypedDict

additional_envs: Dict[str, object]#

None

Additional environment variables for the deployment

disk_size: str#

None

Disk size for the deployment

gpu: typing_extensions.Required[int]#

None

Number of GPUs required for the deployment

image_name: str#

None

Container image name from NGC. If not specified, defaults to multi-llm

image_tag: str#

None

Container image tag from NGC

k8s_nim_operator_config: nemo_microservices.types.v2.inference.k8s_nim_operator_config_param.K8sNIMOperatorConfigParam#

None

Kubernetes configuration for NIM deployment via k8s-nim-operator.

These fields provide typed access to commonly-used NIMService Spec fields and are applied before override_config in the compilation precedence.

lora_enabled: bool#

None

Whether to enable LoRA support

model_name: str#

None

Model name - HF style for HuggingFace, NMP name for NMP models

model_namespace: str#

None

Model namespace - HF style for HuggingFace, NMP namespace for NMP models

model_provider: str#

None

Model provider: ‘hf’ for HuggingFace or ‘nmp’ for NMP

model_revision: str#

None

Model revision (branch, tag, or commit).

If not specified, parsed from model_name @revision suffix or defaults to ‘main’

model_type: nemo_microservices.types.v2.inference.model_type.ModelType#

None

Model type enum for NIM deployments.

override_config: Dict[str, object]#

None

Raw NIMService spec configuration that takes precedence over generated config.

Allows end users to provide advanced configuration options directly.