nemo_automodel.components.models.hy_v3.config

View as Markdown

Module Contents

Classes

NameDescription
HYV3ConfigConfiguration class for Tencent Hy3-preview (295B MoE).

API

class nemo_automodel.components.models.hy_v3.config.HYV3Config(
vocab_size: int = 129280,
hidden_size: int = 4096,
intermediate_size: int = 1536,
moe_intermediate_size: int = 1536,
num_hidden_layers: int = 80,
num_attention_heads: int = 64,
num_key_value_heads: int = 8,
head_dim: int = 128,
num_experts: int = 192,
num_shared_experts: int = 1,
num_experts_per_tok: int = 8,
router_scaling_factor: float = 1.0,
route_norm: bool = False,
moe_router_enable_expert_bias: bool = True,
first_k_dense_replace: int = 1,
max_position_embeddings: int = 262144,
rope_theta: float = 11158840.0,
rope_scaling: dict | None = None,
rms_norm_eps: float = 1e-06,
attention_bias: bool = False,
hidden_act: str = 'silu',
use_cache: bool = True,
pad_token_id: int | None = None,
bos_token_id: int = 1,
eos_token_id: int = 2,
tie_word_embeddings: bool = False,
torch_dtype: str = 'bfloat16',
kwargs = {}
)

Bases: PretrainedConfig

Configuration class for Tencent Hy3-preview (295B MoE).

keys_to_ignore_at_inference
= ['past_key_values']
model_type
= 'hy_v3'