nemo_rl.models.value.config#

Module Contents#

Classes#

ValueConfig

Configuration for Value models in PPO.

API#

class nemo_rl.models.value.config.ValueConfig#

Bases: typing.TypedDict

Configuration for Value models in PPO.

Value models use a subset of PolicyConfig fields, excluding generation-specific and reference policy settings.

Initialization

Initialize self. See help(type(self)) for accurate signature.

model_name: str#

None

tokenizer: nemo_rl.models.policy.TokenizerConfig#

None

train_global_batch_size: int#

None

train_micro_batch_size: int#

None

logprob_batch_size: NotRequired[int]#

None

precision: str#

None

reward_model_cfg: nemo_rl.models.policy.RewardModelConfig#

None

dtensor_cfg: nemo_rl.models.policy.DTensorConfig | nemo_rl.models.policy.DTensorConfigDisabled#

None

megatron_cfg: NotRequired[nemo_rl.models.policy.MegatronConfig | nemo_rl.models.policy.MegatronConfigDisabled]#

None

hf_config_overrides: NotRequired[dict[str, Any]]#

None

dynamic_batching: nemo_rl.models.policy.DynamicBatchingConfig | nemo_rl.models.policy.DynamicBatchingConfigDisabled#

None

sequence_packing: NotRequired[nemo_rl.models.policy.SequencePackingConfig | nemo_rl.models.policy.SequencePackingConfigDisabled]#

None

make_sequence_length_divisible_by: int#

None

max_total_sequence_length: int#

None

max_grad_norm: NotRequired[float | int | None]#

None

dequantize_base_checkpoint: NotRequired[bool]#

None

load_value_head_from_model: NotRequired[bool]#

None

optimizer: NotRequired[nemo_rl.models.policy.PytorchOptimizerConfig | None]#

None

scheduler: NotRequired[list[nemo_rl.models.policy.SinglePytorchSchedulerConfig | nemo_rl.models.policy.SinglePytorchMilestonesConfig] | nemo_rl.models.policy.SchedulerMilestones | None]#

None