nemo_microservices.types.v2.inference.nim_deployment#

Module Contents#

Classes#

API#

class nemo_microservices.types.v2.inference.nim_deployment.NIMDeployment(/, **data: typing.Any)#

Bases: nemo_microservices._models.BaseModel

additional_envs: Optional[Dict[str, object]]#

None

Additional environment variables for the deployment

disk_size: Optional[str]#

None

Disk size for the deployment

gpu: int#

None

Number of GPUs required for the deployment

image_name: Optional[str]#

None

Container image name from NGC. If not specified, defaults to multi-llm

image_tag: Optional[str]#

None

Container image tag from NGC

k8s_nim_operator_config: Optional[nemo_microservices.types.v2.inference.k8s_nim_operator_config.K8sNIMOperatorConfig]#

None

Kubernetes configuration for NIM deployment via k8s-nim-operator.

These fields provide typed access to commonly-used NIMService Spec fields and are applied before override_config in the compilation precedence.

lora_enabled: Optional[bool]#

None

Whether to enable LoRA support

model_name: Optional[str]#

None

Model name - HF style for HuggingFace, NMP name for NMP models

model_namespace: Optional[str]#

None

Model namespace - HF style for HuggingFace, NMP namespace for NMP models

model_provider: Optional[str]#

None

Model provider: ‘hf’ for HuggingFace or ‘nmp’ for NMP

model_revision: Optional[str]#

None

Model revision (branch, tag, or commit).

If not specified, parsed from model_name @revision suffix or defaults to ‘main’

model_type: Optional[nemo_microservices.types.v2.inference.model_type.ModelType]#

None

Model type enum for NIM deployments.

override_config: Optional[Dict[str, object]]#

None

Raw NIMService spec configuration that takes precedence over generated config.

Allows end users to provide advanced configuration options directly.