nemo_rl.models.policy#

Submodules#

Package Contents#

Classes#

Data#

API#

class nemo_rl.models.policy.DTensorConfig[source]#

Bases: typing.TypedDict

enabled: bool#

None

cpu_offload: NotRequired[bool]#

None

sequence_parallel: NotRequired[bool]#

None

activation_checkpointing: NotRequired[bool]#

None

tensor_parallel_size: NotRequired[int]#

None

context_parallel_size: NotRequired[int]#

None

custom_parallel_plan: NotRequired[str]#

None

class nemo_rl.models.policy.SequencePackingConfig[source]#

Bases: typing.TypedDict

enabled: bool#

None

train_mb_tokens: int#

None

logprob_mb_tokens: int#

None

algorithm: str#

None

class nemo_rl.models.policy.MegatronOptimizerConfig[source]#

Bases: typing.TypedDict

optimizer: str#

None

lr: float#

None

min_lr: float#

None

weight_decay: float#

None

bf16: bool#

None

fp16: bool#

None

params_dtype: str#

None

adam_beta1: float#

None

adam_beta2: float#

None

adam_eps: float#

None

sgd_momentum: float#

None

use_distributed_optimizer: bool#

None

use_precision_aware_optimizer: bool#

None

clip_grad: float#

None

class nemo_rl.models.policy.MegatronSchedulerConfig[source]#

Bases: typing.TypedDict

start_weight_decay: float#

None

end_weight_decay: float#

None

weight_decay_incr_style: str#

None

lr_decay_style: str#

None

lr_decay_iters: int#

None

lr_warmup_iters: int#

None

lr_warmup_init: float#

None

class nemo_rl.models.policy.MegatronDDPConfig[source]#

Bases: typing.TypedDict

grad_reduce_in_fp32: bool#

None

overlap_grad_reduce: bool#

None

overlap_param_gather: bool#

None

average_in_collective: bool#

None

use_custom_fsdp: bool#

None

data_parallel_sharding_strategy: str#

None

class nemo_rl.models.policy.MegatronConfig[source]#

Bases: typing.TypedDict

enabled: bool#

None

empty_unused_memory_level: int#

None

activation_checkpointing: bool#

None

converter_type: str#

None

tensor_model_parallel_size: int#

None

pipeline_model_parallel_size: int#

None

num_layers_in_first_pipeline_stage: int#

None

num_layers_in_last_pipeline_stage: int#

None

context_parallel_size: int#

None

pipeline_dtype: str#

None

sequence_parallel: bool#

None

freeze_moe_router: bool#

None

expert_tensor_parallel_size: int#

None

expert_model_parallel_size: int#

None

optimizer: NotRequired[nemo_rl.models.policy.MegatronOptimizerConfig]#

None

scheduler: NotRequired[nemo_rl.models.policy.MegatronSchedulerConfig]#

None

distributed_data_parallel_config: nemo_rl.models.policy.MegatronDDPConfig#

None

class nemo_rl.models.policy.TokenizerConfig[source]#

Bases: typing.TypedDict

name: str#

None

chat_template: NotRequired[str]#

None

class nemo_rl.models.policy.PytorchOptimizerConfig[source]#

Bases: typing.TypedDict

name: str#

None

kwargs: dict[str, Any]#

None

class nemo_rl.models.policy.SinglePytorchSchedulerConfig[source]#

Bases: typing.TypedDict

name: str#

None

kwargs: dict[str, Any]#

None

milestones: NotRequired[list[int]]#

None

nemo_rl.models.policy.SchedulerMilestones#

None

class nemo_rl.models.policy.DynamicBatchingConfig[source]#

Bases: typing.TypedDict

enabled: bool#

None

train_mb_tokens: NotRequired[int]#

None

logprob_mb_tokens: NotRequired[int]#

None

sequence_length_round: NotRequired[int]#

None

class nemo_rl.models.policy.PolicyConfig[source]#

Bases: typing.TypedDict

model_name: str#

None

tokenizer: nemo_rl.models.policy.TokenizerConfig#

None

train_global_batch_size: int#

None

train_micro_batch_size: int#

None

logprob_batch_size: NotRequired[int]#

None

generation: NotRequired[nemo_rl.models.generation.interfaces.GenerationConfig]#

None

generation_batch_size: NotRequired[int]#

None

precision: str#

None

dtensor_cfg: nemo_rl.models.policy.DTensorConfig#

None

megatron_cfg: NotRequired[nemo_rl.models.policy.MegatronConfig]#

None

dynamic_batching: nemo_rl.models.policy.DynamicBatchingConfig#

None

sequence_packing: NotRequired[nemo_rl.models.policy.SequencePackingConfig]#

None

make_sequence_length_divisible_by: int#

None

max_total_sequence_length: int#

None

max_grad_norm: NotRequired[Union[float, int]]#

None

refit_buffer_size_gb: NotRequired[float]#

None

optimizer: NotRequired[nemo_rl.models.policy.PytorchOptimizerConfig]#

None

scheduler: NotRequired[list[nemo_rl.models.policy.SinglePytorchSchedulerConfig] | nemo_rl.models.policy.SchedulerMilestones]#

None