bridge.models.glm.glm45_provider
#
Module Contents#
Classes#
Base provider for GLM MoE Models. |
|
Provider for GLM 4.5 355B-A32B: https://huggingface.co/zai-org/GLM-4.5 |
|
Provider for GLM 4.5 Air 106B-A12B: https://huggingface.co/zai-org/GLM-4.5-Air |
Data#
API#
- bridge.models.glm.glm45_provider.logger#
‘getLogger(…)’
- class bridge.models.glm.glm45_provider.GLMMoEModelProvider#
Bases:
megatron.bridge.models.gpt_provider.GPTModelProvider
Base provider for GLM MoE Models.
- transformer_layer_spec: Union[megatron.core.transformer.ModuleSpec, Callable[[megatron.bridge.models.gpt_provider.GPTModelProvider], megatron.core.transformer.ModuleSpec]]#
‘partial(…)’
- normalization: str#
‘RMSNorm’
- activation_func: Callable#
None
- gated_linear_unit: bool#
True
- add_bias_linear: bool#
False
- add_qkv_bias: bool#
True
- seq_length: int#
131072
- init_method_std: int#
0.02
0.0
- vocab_size: int#
151552
False
- layernorm_epsilon: float#
1e-05
- autocast_dtype: torch.dtype#
None
- params_dtype: torch.dtype#
None
- bf16: bool#
True
- num_query_groups: int#
8
- num_attention_heads: int#
96
- attention_dropout: float#
0.0
- kv_channels: int#
128
- position_embedding_type: str#
‘rope’
- rotary_base: float#
1000000.0
- rotary_percent: float#
0.5
- moe_router_topk: int#
8
True
- moe_token_dispatcher_type: str#
‘alltoall’
- moe_router_load_balancing_type: str#
‘seq_aux_loss’
- moe_aux_loss_coeff: float#
0.001
- moe_router_pre_softmax: bool#
False
- moe_grouped_gemm: bool#
True
- moe_router_score_function: str#
‘sigmoid’
- moe_permute_fusion: bool#
True
- moe_router_dtype: str#
‘fp32’
- moe_router_enable_expert_bias: bool#
True
- moe_router_bias_update_rate: float#
0
- persist_layer_norm: bool#
True
- bias_activation_fusion: bool#
True
- bias_dropout_fusion: bool#
True
- mtp_num_layers: Optional[int]#
1
- mtp_loss_scaling_factor: Optional[float]#
0.3
- class bridge.models.glm.glm45_provider.GLM45ModelProvider355B#
Bases:
bridge.models.glm.glm45_provider.GLMMoEModelProvider
Provider for GLM 4.5 355B-A32B: https://huggingface.co/zai-org/GLM-4.5
- num_layers: int#
92
- num_moe_experts: int#
160
5120
12288
- moe_layer_freq: Union[int, List[int]]#
‘field(…)’
1536
1536
- qk_layernorm: bool#
True
- moe_router_topk_scaling_factor: float#
2.5
- class bridge.models.glm.glm45_provider.GLM45AirModelProvider106B#
Bases:
bridge.models.glm.glm45_provider.GLMMoEModelProvider
Provider for GLM 4.5 Air 106B-A12B: https://huggingface.co/zai-org/GLM-4.5-Air
- num_layers: int#
46
- num_moe_experts: int#
128
4096
10944
- moe_layer_freq: Union[int, List[int]]#
‘field(…)’
1408
1408
- qk_layernorm: bool#
False
- moe_router_topk_scaling_factor: float#
1.0