nemo_rl.models.policy
#
Submodules#
Package Contents#
Classes#
Data#
API#
- class nemo_rl.models.policy.DTensorConfig[source]#
Bases:
typing.TypedDict
- enabled: bool#
None
- cpu_offload: NotRequired[bool]#
None
- sequence_parallel: NotRequired[bool]#
None
- activation_checkpointing: NotRequired[bool]#
None
- tensor_parallel_size: NotRequired[int]#
None
- context_parallel_size: NotRequired[int]#
None
- custom_parallel_plan: NotRequired[str]#
None
- class nemo_rl.models.policy.SequencePackingConfig[source]#
Bases:
typing.TypedDict
- enabled: bool#
None
- train_mb_tokens: int#
None
- logprob_mb_tokens: int#
None
- algorithm: str#
None
- class nemo_rl.models.policy.MegatronOptimizerConfig[source]#
Bases:
typing.TypedDict
- optimizer: str#
None
- lr: float#
None
- min_lr: float#
None
- weight_decay: float#
None
- bf16: bool#
None
- fp16: bool#
None
- params_dtype: str#
None
- adam_beta1: float#
None
- adam_beta2: float#
None
- adam_eps: float#
None
- sgd_momentum: float#
None
- use_distributed_optimizer: bool#
None
- use_precision_aware_optimizer: bool#
None
- clip_grad: float#
None
- class nemo_rl.models.policy.MegatronSchedulerConfig[source]#
Bases:
typing.TypedDict
- start_weight_decay: float#
None
- end_weight_decay: float#
None
- weight_decay_incr_style: str#
None
- lr_decay_style: str#
None
- lr_decay_iters: int#
None
- lr_warmup_iters: int#
None
- lr_warmup_init: float#
None
- class nemo_rl.models.policy.MegatronDDPConfig[source]#
Bases:
typing.TypedDict
- grad_reduce_in_fp32: bool#
None
- overlap_grad_reduce: bool#
None
- overlap_param_gather: bool#
None
- average_in_collective: bool#
None
- use_custom_fsdp: bool#
None
- data_parallel_sharding_strategy: str#
None
- class nemo_rl.models.policy.MegatronConfig[source]#
Bases:
typing.TypedDict
- enabled: bool#
None
- empty_unused_memory_level: int#
None
- activation_checkpointing: bool#
None
- converter_type: str#
None
- tensor_model_parallel_size: int#
None
- pipeline_model_parallel_size: int#
None
- num_layers_in_first_pipeline_stage: int#
None
- num_layers_in_last_pipeline_stage: int#
None
- context_parallel_size: int#
None
- pipeline_dtype: str#
None
- sequence_parallel: bool#
None
- freeze_moe_router: bool#
None
- expert_tensor_parallel_size: int#
None
- expert_model_parallel_size: int#
None
- optimizer: NotRequired[nemo_rl.models.policy.MegatronOptimizerConfig]#
None
- scheduler: NotRequired[nemo_rl.models.policy.MegatronSchedulerConfig]#
None
- distributed_data_parallel_config: nemo_rl.models.policy.MegatronDDPConfig#
None
- class nemo_rl.models.policy.TokenizerConfig[source]#
Bases:
typing.TypedDict
- name: str#
None
- chat_template: NotRequired[str]#
None
- class nemo_rl.models.policy.PytorchOptimizerConfig[source]#
Bases:
typing.TypedDict
- name: str#
None
- kwargs: dict[str, Any]#
None
- class nemo_rl.models.policy.SinglePytorchSchedulerConfig[source]#
Bases:
typing.TypedDict
- name: str#
None
- kwargs: dict[str, Any]#
None
- milestones: NotRequired[list[int]]#
None
- nemo_rl.models.policy.SchedulerMilestones#
None
- class nemo_rl.models.policy.DynamicBatchingConfig[source]#
Bases:
typing.TypedDict
- enabled: bool#
None
- train_mb_tokens: NotRequired[int]#
None
- logprob_mb_tokens: NotRequired[int]#
None
- sequence_length_round: NotRequired[int]#
None
- class nemo_rl.models.policy.PolicyConfig[source]#
Bases:
typing.TypedDict
- model_name: str#
None
- tokenizer: nemo_rl.models.policy.TokenizerConfig#
None
- train_global_batch_size: int#
None
- train_micro_batch_size: int#
None
- logprob_batch_size: NotRequired[int]#
None
- generation: NotRequired[nemo_rl.models.generation.interfaces.GenerationConfig]#
None
- generation_batch_size: NotRequired[int]#
None
- precision: str#
None
- dtensor_cfg: nemo_rl.models.policy.DTensorConfig#
None
- megatron_cfg: NotRequired[nemo_rl.models.policy.MegatronConfig]#
None
- dynamic_batching: nemo_rl.models.policy.DynamicBatchingConfig#
None
- sequence_packing: NotRequired[nemo_rl.models.policy.SequencePackingConfig]#
None
- make_sequence_length_divisible_by: int#
None
- max_total_sequence_length: int#
None
- max_grad_norm: NotRequired[Union[float, int]]#
None
- refit_buffer_size_gb: NotRequired[float]#
None
- optimizer: NotRequired[nemo_rl.models.policy.PytorchOptimizerConfig]#
None
- scheduler: NotRequired[list[nemo_rl.models.policy.SinglePytorchSchedulerConfig] | nemo_rl.models.policy.SchedulerMilestones]#
None