nemo_rl.models.policy#
Submodules#
Package Contents#
Classes#
Data#
API#
- class nemo_rl.models.policy.LoRAConfigDisabled#
Bases:
typing.TypedDict- enabled: Literal[False]#
None
- class nemo_rl.models.policy.LoRAConfig#
Bases:
typing.TypedDict- enabled: Literal[True]#
None
- target_modules: list[str]#
None
- exclude_modules: list[str]#
None
- match_all_linear: NotRequired[bool]#
None
- dim: int#
None
- alpha: int#
None
- dropout: float#
None
- dropout_position: Literal[pre, post]#
None
- lora_A_init: str#
None
- use_triton: NotRequired[bool]#
None
- class nemo_rl.models.policy.DTensorConfigDisabled#
Bases:
typing.TypedDict- enabled: Literal[False]#
None
- class nemo_rl.models.policy.DTensorConfig#
Bases:
typing.TypedDict- enabled: Literal[True]#
None
- env_vars: NotRequired[dict[str, str] | None]#
None
- _v2: NotRequired[bool]#
None
- cpu_offload: bool#
None
- sequence_parallel: bool#
None
- activation_checkpointing: bool#
None
- tensor_parallel_size: int#
None
- context_parallel_size: int#
None
- custom_parallel_plan: str | None#
None
- clear_cache_every_n_steps: NotRequired[int | None]#
None
- lora_cfg: NotRequired[nemo_rl.models.policy.LoRAConfig | nemo_rl.models.policy.LoRAConfigDisabled]#
None
- class nemo_rl.models.policy.SequencePackingConfigDisabled#
Bases:
typing.TypedDict- enabled: Literal[False]#
None
- class nemo_rl.models.policy.SequencePackingConfig#
Bases:
typing.TypedDict- enabled: Literal[True]#
None
- train_mb_tokens: int#
None
- logprob_mb_tokens: NotRequired[int]#
None
- algorithm: str#
None
- class nemo_rl.models.policy.RewardModelConfig#
Bases:
typing.TypedDict- enabled: bool#
None
- reward_model_type: str#
None
- class nemo_rl.models.policy.MegatronOptimizerConfig#
Bases:
typing.TypedDict- optimizer: str#
None
- lr: float#
None
- min_lr: float#
None
- weight_decay: float#
None
- bf16: bool#
None
- fp16: bool#
None
- params_dtype: str#
None
- adam_beta1: float#
None
- adam_beta2: float#
None
- adam_eps: float#
None
- sgd_momentum: float#
None
- use_distributed_optimizer: bool#
None
- use_precision_aware_optimizer: bool#
None
- clip_grad: float#
None
- optimizer_cpu_offload: bool#
None
- optimizer_offload_fraction: float#
None
- class nemo_rl.models.policy.MegatronSchedulerConfig#
Bases:
typing.TypedDict- start_weight_decay: float#
None
- end_weight_decay: float#
None
- weight_decay_incr_style: str#
None
- lr_decay_style: str#
None
- lr_decay_iters: NotRequired[int | None]#
None
- lr_warmup_iters: int#
None
- lr_warmup_init: float#
None
- class nemo_rl.models.policy.MegatronDDPConfig#
Bases:
typing.TypedDict- grad_reduce_in_fp32: bool#
None
- overlap_grad_reduce: bool#
None
- overlap_param_gather: bool#
None
- use_custom_fsdp: bool#
None
- data_parallel_sharding_strategy: str#
None
- class nemo_rl.models.policy.MegatronConfigDisabled#
Bases:
typing.TypedDict- enabled: Literal[False]#
None
- class nemo_rl.models.policy.MegatronConfig#
Bases:
typing.TypedDict- enabled: Literal[True]#
None
- env_vars: NotRequired[dict[str, str] | None]#
None
- empty_unused_memory_level: int#
None
- activation_checkpointing: bool#
None
- tensor_model_parallel_size: int#
None
- pipeline_model_parallel_size: int#
None
- num_layers_in_first_pipeline_stage: int | None#
None
- num_layers_in_last_pipeline_stage: int | None#
None
- context_parallel_size: int#
None
- pipeline_dtype: str#
None
- sequence_parallel: bool#
None
- freeze_moe_router: bool#
None
- expert_tensor_parallel_size: int#
None
- expert_model_parallel_size: int#
None
- defer_fp32_logits: NotRequired[bool]#
None
- apply_rope_fusion: bool#
None
- bias_activation_fusion: bool#
None
- force_overwrite_initial_ckpt: NotRequired[bool]#
None
- moe_per_layer_logging: bool#
None
- optimizer: nemo_rl.models.policy.MegatronOptimizerConfig#
None
- scheduler: nemo_rl.models.policy.MegatronSchedulerConfig#
None
- distributed_data_parallel_config: nemo_rl.models.policy.MegatronDDPConfig#
None
- class nemo_rl.models.policy.TokenizerConfig#
Bases:
typing.TypedDict- name: str#
None
- chat_template: NotRequired[str]#
None
- chat_template_kwargs: NotRequired[dict[str, Any] | None]#
None
- class nemo_rl.models.policy.PytorchOptimizerConfig#
Bases:
typing.TypedDict- name: str#
None
- kwargs: dict[str, Any]#
None
- class nemo_rl.models.policy.SinglePytorchSchedulerConfig#
Bases:
typing.TypedDict- name: str#
None
- kwargs: dict[str, Any]#
None
- class nemo_rl.models.policy.SinglePytorchMilestonesConfig#
Bases:
typing.TypedDict- milestones: list[int]#
None
- nemo_rl.models.policy.SchedulerMilestones#
None
- class nemo_rl.models.policy.DynamicBatchingConfigDisabled#
Bases:
typing.TypedDict- enabled: Literal[False]#
None
- class nemo_rl.models.policy.DynamicBatchingConfig#
Bases:
typing.TypedDict- enabled: Literal[True]#
None
- train_mb_tokens: int#
None
- logprob_mb_tokens: NotRequired[int]#
None
- sequence_length_round: int#
None
- class nemo_rl.models.policy.PolicyConfig#
Bases:
typing.TypedDict- model_name: str#
None
- tokenizer: nemo_rl.models.policy.TokenizerConfig#
None
- train_global_batch_size: int#
None
- train_micro_batch_size: int#
None
- logprob_batch_size: NotRequired[int]#
None
- logprob_chunk_size: NotRequired[int | None]#
None
- generation: NotRequired[nemo_rl.models.generation.interfaces.GenerationConfig]#
None
- generation_batch_size: NotRequired[int]#
None
- precision: str#
None
- reward_model_cfg: NotRequired[nemo_rl.models.policy.RewardModelConfig]#
None
- dtensor_cfg: nemo_rl.models.policy.DTensorConfig | nemo_rl.models.policy.DTensorConfigDisabled#
None
- megatron_cfg: NotRequired[nemo_rl.models.policy.MegatronConfig | nemo_rl.models.policy.MegatronConfigDisabled]#
None
- hf_config_overrides: NotRequired[dict[str, Any]]#
None
- dynamic_batching: nemo_rl.models.policy.DynamicBatchingConfig | nemo_rl.models.policy.DynamicBatchingConfigDisabled#
None
- sequence_packing: NotRequired[nemo_rl.models.policy.SequencePackingConfig | nemo_rl.models.policy.SequencePackingConfigDisabled]#
None
- make_sequence_length_divisible_by: int#
None
- max_total_sequence_length: int#
None
- max_grad_norm: NotRequired[float | int | None]#
None
- refit_buffer_size_gb: NotRequired[float]#
None
- optimizer: NotRequired[nemo_rl.models.policy.PytorchOptimizerConfig | None]#
None
- scheduler: NotRequired[list[nemo_rl.models.policy.SinglePytorchSchedulerConfig | nemo_rl.models.policy.SinglePytorchMilestonesConfig] | nemo_rl.models.policy.SchedulerMilestones | None]#
None