nemo_rl.models.policy#

Submodules#

Package Contents#

Classes#

Data#

API#

class nemo_rl.models.policy.LoRAConfigDisabled#

Bases: typing.TypedDict

enabled: Literal[False]#

None

class nemo_rl.models.policy.LoRAConfig#

Bases: typing.TypedDict

enabled: Literal[True]#

None

target_modules: list[str]#

None

exclude_modules: list[str]#

None

match_all_linear: NotRequired[bool]#

None

dim: int#

None

alpha: int#

None

dropout: float#

None

dropout_position: Literal[pre, post]#

None

lora_A_init: str#

None

use_triton: NotRequired[bool]#

None

class nemo_rl.models.policy.DTensorConfigDisabled#

Bases: typing.TypedDict

enabled: Literal[False]#

None

class nemo_rl.models.policy.DTensorConfig#

Bases: typing.TypedDict

enabled: Literal[True]#

None

env_vars: NotRequired[dict[str, str] | None]#

None

_v2: NotRequired[bool]#

None

cpu_offload: bool#

None

sequence_parallel: bool#

None

activation_checkpointing: bool#

None

tensor_parallel_size: int#

None

context_parallel_size: int#

None

custom_parallel_plan: str | None#

None

clear_cache_every_n_steps: NotRequired[int | None]#

None

lora_cfg: NotRequired[nemo_rl.models.policy.LoRAConfig | nemo_rl.models.policy.LoRAConfigDisabled]#

None

class nemo_rl.models.policy.SequencePackingConfigDisabled#

Bases: typing.TypedDict

enabled: Literal[False]#

None

class nemo_rl.models.policy.SequencePackingConfig#

Bases: typing.TypedDict

enabled: Literal[True]#

None

train_mb_tokens: int#

None

logprob_mb_tokens: NotRequired[int]#

None

algorithm: str#

None

class nemo_rl.models.policy.RewardModelConfig#

Bases: typing.TypedDict

enabled: bool#

None

reward_model_type: str#

None

class nemo_rl.models.policy.MegatronOptimizerConfig#

Bases: typing.TypedDict

optimizer: str#

None

lr: float#

None

min_lr: float#

None

weight_decay: float#

None

bf16: bool#

None

fp16: bool#

None

params_dtype: str#

None

adam_beta1: float#

None

adam_beta2: float#

None

adam_eps: float#

None

sgd_momentum: float#

None

use_distributed_optimizer: bool#

None

use_precision_aware_optimizer: bool#

None

clip_grad: float#

None

optimizer_cpu_offload: bool#

None

optimizer_offload_fraction: float#

None

class nemo_rl.models.policy.MegatronSchedulerConfig#

Bases: typing.TypedDict

start_weight_decay: float#

None

end_weight_decay: float#

None

weight_decay_incr_style: str#

None

lr_decay_style: str#

None

lr_decay_iters: NotRequired[int | None]#

None

lr_warmup_iters: int#

None

lr_warmup_init: float#

None

class nemo_rl.models.policy.MegatronDDPConfig#

Bases: typing.TypedDict

grad_reduce_in_fp32: bool#

None

overlap_grad_reduce: bool#

None

overlap_param_gather: bool#

None

use_custom_fsdp: bool#

None

data_parallel_sharding_strategy: str#

None

class nemo_rl.models.policy.MegatronConfigDisabled#

Bases: typing.TypedDict

enabled: Literal[False]#

None

class nemo_rl.models.policy.MegatronConfig#

Bases: typing.TypedDict

enabled: Literal[True]#

None

env_vars: NotRequired[dict[str, str] | None]#

None

empty_unused_memory_level: int#

None

activation_checkpointing: bool#

None

tensor_model_parallel_size: int#

None

pipeline_model_parallel_size: int#

None

num_layers_in_first_pipeline_stage: int | None#

None

num_layers_in_last_pipeline_stage: int | None#

None

context_parallel_size: int#

None

pipeline_dtype: str#

None

sequence_parallel: bool#

None

freeze_moe_router: bool#

None

expert_tensor_parallel_size: int#

None

expert_model_parallel_size: int#

None

defer_fp32_logits: NotRequired[bool]#

None

apply_rope_fusion: bool#

None

bias_activation_fusion: bool#

None

force_overwrite_initial_ckpt: NotRequired[bool]#

None

moe_per_layer_logging: bool#

None

optimizer: nemo_rl.models.policy.MegatronOptimizerConfig#

None

scheduler: nemo_rl.models.policy.MegatronSchedulerConfig#

None

distributed_data_parallel_config: nemo_rl.models.policy.MegatronDDPConfig#

None

class nemo_rl.models.policy.TokenizerConfig#

Bases: typing.TypedDict

name: str#

None

chat_template: NotRequired[str]#

None

chat_template_kwargs: NotRequired[dict[str, Any] | None]#

None

class nemo_rl.models.policy.PytorchOptimizerConfig#

Bases: typing.TypedDict

name: str#

None

kwargs: dict[str, Any]#

None

class nemo_rl.models.policy.SinglePytorchSchedulerConfig#

Bases: typing.TypedDict

name: str#

None

kwargs: dict[str, Any]#

None

class nemo_rl.models.policy.SinglePytorchMilestonesConfig#

Bases: typing.TypedDict

milestones: list[int]#

None

nemo_rl.models.policy.SchedulerMilestones#

None

class nemo_rl.models.policy.DynamicBatchingConfigDisabled#

Bases: typing.TypedDict

enabled: Literal[False]#

None

class nemo_rl.models.policy.DynamicBatchingConfig#

Bases: typing.TypedDict

enabled: Literal[True]#

None

train_mb_tokens: int#

None

logprob_mb_tokens: NotRequired[int]#

None

sequence_length_round: int#

None

class nemo_rl.models.policy.PolicyConfig#

Bases: typing.TypedDict

model_name: str#

None

tokenizer: nemo_rl.models.policy.TokenizerConfig#

None

train_global_batch_size: int#

None

train_micro_batch_size: int#

None

logprob_batch_size: NotRequired[int]#

None

logprob_chunk_size: NotRequired[int | None]#

None

generation: NotRequired[nemo_rl.models.generation.interfaces.GenerationConfig]#

None

generation_batch_size: NotRequired[int]#

None

precision: str#

None

reward_model_cfg: NotRequired[nemo_rl.models.policy.RewardModelConfig]#

None

dtensor_cfg: nemo_rl.models.policy.DTensorConfig | nemo_rl.models.policy.DTensorConfigDisabled#

None

megatron_cfg: NotRequired[nemo_rl.models.policy.MegatronConfig | nemo_rl.models.policy.MegatronConfigDisabled]#

None

hf_config_overrides: NotRequired[dict[str, Any]]#

None

dynamic_batching: nemo_rl.models.policy.DynamicBatchingConfig | nemo_rl.models.policy.DynamicBatchingConfigDisabled#

None

sequence_packing: NotRequired[nemo_rl.models.policy.SequencePackingConfig | nemo_rl.models.policy.SequencePackingConfigDisabled]#

None

make_sequence_length_divisible_by: int#

None

max_total_sequence_length: int#

None

max_grad_norm: NotRequired[float | int | None]#

None

refit_buffer_size_gb: NotRequired[float]#

None

optimizer: NotRequired[nemo_rl.models.policy.PytorchOptimizerConfig | None]#

None

scheduler: NotRequired[list[nemo_rl.models.policy.SinglePytorchSchedulerConfig | nemo_rl.models.policy.SinglePytorchMilestonesConfig] | nemo_rl.models.policy.SchedulerMilestones | None]#

None