nemo_rl.models.policy#

Submodules#

Package Contents#

Classes#

API#

class nemo_rl.models.policy.DTensorConfig[source]#

Bases: typing.TypedDict

enabled: bool#

None

cpu_offload: bool#

None

sequence_parallel: bool#

None

activation_checkpointing: bool#

None

tensor_parallel_size: int#

None

class nemo_rl.models.policy.TokenizerConfig[source]#

Bases: typing.TypedDict

name: str#

None

chat_template: str#

None

class nemo_rl.models.policy.DynamicBatchingConfig[source]#

Bases: typing.TypedDict

enabled: bool#

None

train_mb_tokens: int#

None

logprob_mb_tokens: int#

None

sequence_length_round: int#

None

class nemo_rl.models.policy.PolicyConfig[source]#

Bases: typing.TypedDict

model_name: str#

None

tokenizer: nemo_rl.models.policy.TokenizerConfig#

None

train_global_batch_size: int#

None

train_micro_batch_size: int#

None

learning_rate: float#

None

logprob_batch_size: int#

None

generation: Optional[nemo_rl.models.generation.interfaces.GenerationConfig]#

None

precision: str#

None

dtensor_cfg: nemo_rl.models.policy.DTensorConfig#

None

dynamic_batching: nemo_rl.models.policy.DynamicBatchingConfig#

None

make_sequence_length_divisible_by: int#

None

max_grad_norm: Optional[Union[float, int]]#

None

fsdp_offload_enabled: bool#

None

activation_checkpointing_enabled: bool#

None

refit_buffer_size_gb: int#

None