nemo_rl.models.policy
#
Submodules#
Package Contents#
Classes#
API#
- class nemo_rl.models.policy.DTensorConfig[source]#
Bases:
typing.TypedDict
- enabled: bool#
None
- cpu_offload: bool#
None
- sequence_parallel: bool#
None
- activation_checkpointing: bool#
None
- tensor_parallel_size: int#
None
- class nemo_rl.models.policy.TokenizerConfig[source]#
Bases:
typing.TypedDict
- name: str#
None
- chat_template: str#
None
- class nemo_rl.models.policy.DynamicBatchingConfig[source]#
Bases:
typing.TypedDict
- enabled: bool#
None
- train_mb_tokens: int#
None
- logprob_mb_tokens: int#
None
- sequence_length_round: int#
None
- class nemo_rl.models.policy.PolicyConfig[source]#
Bases:
typing.TypedDict
- model_name: str#
None
- tokenizer: nemo_rl.models.policy.TokenizerConfig#
None
- train_global_batch_size: int#
None
- train_micro_batch_size: int#
None
- learning_rate: float#
None
- logprob_batch_size: int#
None
- generation: Optional[nemo_rl.models.generation.interfaces.GenerationConfig]#
None
- precision: str#
None
- dtensor_cfg: nemo_rl.models.policy.DTensorConfig#
None
- dynamic_batching: nemo_rl.models.policy.DynamicBatchingConfig#
None
- make_sequence_length_divisible_by: int#
None
- max_grad_norm: Optional[Union[float, int]]#
None
- fsdp_offload_enabled: bool#
None
- activation_checkpointing_enabled: bool#
None
- refit_buffer_size_gb: int#
None