nemo_curator.core.serve.dynamo.config

View as Markdown

Module Contents

Classes

NameDescription
DynamoRoleConfigPer-role config for disaggregated Dynamo serving.
DynamoRouterConfigFrontend router config for Dynamo.
DynamoServerConfigServer-level Dynamo config.
DynamoVLLMModelConfigDynamo vLLM model config.

API

class nemo_curator.core.serve.dynamo.config.DynamoRoleConfig(
num_replicas: int = 1,
engine_kwargs: dict[str, typing.Any] = dict()
)
Dataclass

Per-role config for disaggregated Dynamo serving.

engine_kwargs
dict[str, Any] = field(default_factory=dict)
num_replicas
int = 1
nemo_curator.core.serve.dynamo.config.DynamoRoleConfig.__post_init__() -> None
class nemo_curator.core.serve.dynamo.config.DynamoRouterConfig(
mode: typing.Literal['round_robin', 'random', 'kv', 'direct'] | None = None,
kv_events: bool = False,
router_kwargs: dict[str, typing.Any] = dict()
)
Dataclass

Frontend router config for Dynamo.

mode=None means “auto”: Curator picks "kv" if any model uses mode="disagg", else leaves --router-mode unset so the Dynamo frontend falls back to its own round_robin default. kv_events only applies when mode == "kv": pass kv_events=True to opt into exact ZMQ KV-cache event publishing; the default uses the router’s approximate tree-based tracking. Anything else is forwarded to the Dynamo frontend as CLI args via router_kwargs.

_RESERVED_ROUTER_KWARGS
frozenset[str] = frozenset({'router_mode', 'router_kv_events'})
kv_events
bool = False
mode
Literal['round_robin', 'random', 'kv', 'direct'] | None = None
router_kwargs
dict[str, Any] = field(default_factory=dict)
nemo_curator.core.serve.dynamo.config.DynamoRouterConfig.__post_init__() -> None
class nemo_curator.core.serve.dynamo.config.DynamoServerConfig(
etcd_endpoint: str | None = None,
nats_url: str | None = None,
namespace: str = DEFAULT_DYNAMO_NAMESPACE,
request_plane: str = DEFAULT_DYNAMO_REQUEST_PLANE,
event_plane: str = DEFAULT_DYNAMO_EVENT_PLANE,
router: nemo_curator.core.serve.dynamo.config.DynamoRouterConfig = DynamoRouterConfig()
)
Dataclass

Bases: BaseServerConfig

Server-level Dynamo config.

etcd_endpoint
str | None = None
event_plane
str = DEFAULT_DYNAMO_EVENT_PLANE
model_configs
tuple[type[BaseModelConfig], ...] = (DynamoVLLMModelConfig,)
namespace
str = DEFAULT_DYNAMO_NAMESPACE
nats_url
str | None = None
request_plane
str = DEFAULT_DYNAMO_REQUEST_PLANE
router
DynamoRouterConfig = field(default_factory=DynamoRouterConfig)
class nemo_curator.core.serve.dynamo.config.DynamoVLLMModelConfig(
model_identifier: str,
model_name: str | None = None,
runtime_env: dict[str, typing.Any] = dict(),
engine_kwargs: dict[str, typing.Any] = dict(),
num_replicas: int = 1,
mode: typing.Literal['aggregated', 'disagg'] = 'aggregated',
prefill: nemo_curator.core.serve.dynamo.config.DynamoRoleConfig | None = None,
decode: nemo_curator.core.serve.dynamo.config.DynamoRoleConfig | None = None,
dynamo_kwargs: dict[str, typing.Any] = dict()
)
Dataclass

Bases: BaseModelConfig

Dynamo vLLM model config.

Typed fields cover deployment/placement knobs Curator branches on; anything else is forwarded to python -m dynamo.vllm via dynamo_kwargs. kv_events_config and kv_transfer_config are Curator-managed (init=False): events are derived from router state + port allocation, transfer defaults to NixlConnector for disagg.

decode
DynamoRoleConfig | None = None
dynamo_kwargs
dict[str, Any] = field(default_factory=dict)
engine_kwargs
dict[str, Any] = field(default_factory=dict)
kv_events_config
dict[str, Any]
kv_transfer_config
dict[str, Any]
mode
Literal['aggregated', 'disagg'] = 'aggregated'
num_replicas
int = 1
prefill
DynamoRoleConfig | None = None
nemo_curator.core.serve.dynamo.config.DynamoVLLMModelConfig.__post_init__() -> None