bridge.models.glm.glm45_provider#

Module Contents#

Classes#

GLMMoEModelProvider

Base provider for GLM MoE Models.

GLM45ModelProvider355B

Provider for GLM 4.5 355B-A32B: https://huggingface.co/zai-org/GLM-4.5

GLM45AirModelProvider106B

Provider for GLM 4.5 Air 106B-A12B: https://huggingface.co/zai-org/GLM-4.5-Air

Data#

API#

bridge.models.glm.glm45_provider.logger#

‘getLogger(…)’

class bridge.models.glm.glm45_provider.GLMMoEModelProvider#

Bases: megatron.bridge.models.gpt_provider.GPTModelProvider

Base provider for GLM MoE Models.

transformer_layer_spec: Union[megatron.core.transformer.ModuleSpec, Callable[[megatron.bridge.models.gpt_provider.GPTModelProvider], megatron.core.transformer.ModuleSpec]]#

‘partial(…)’

normalization: str#

‘RMSNorm’

activation_func: Callable#

None

gated_linear_unit: bool#

True

add_bias_linear: bool#

False

add_qkv_bias: bool#

True

seq_length: int#

131072

init_method_std: int#

0.02

hidden_dropout: float#

0.0

vocab_size: int#

151552

share_embeddings_and_output_weights: Optional[bool]#

False

layernorm_epsilon: float#

1e-05

autocast_dtype: torch.dtype#

None

params_dtype: torch.dtype#

None

bf16: bool#

True

num_query_groups: int#

8

num_attention_heads: int#

96

attention_dropout: float#

0.0

kv_channels: int#

128

position_embedding_type: str#

‘rope’

rotary_base: float#

1000000.0

rotary_percent: float#

0.5

moe_router_topk: int#

8

moe_shared_expert_overlap: bool#

True

moe_token_dispatcher_type: str#

‘alltoall’

moe_router_load_balancing_type: str#

‘seq_aux_loss’

moe_aux_loss_coeff: float#

0.001

moe_router_pre_softmax: bool#

False

moe_grouped_gemm: bool#

True

moe_router_score_function: str#

‘sigmoid’

moe_permute_fusion: bool#

True

moe_router_dtype: str#

‘fp32’

moe_router_enable_expert_bias: bool#

True

moe_router_bias_update_rate: float#

0

persist_layer_norm: bool#

True

bias_activation_fusion: bool#

True

bias_dropout_fusion: bool#

True

mtp_num_layers: Optional[int]#

1

mtp_loss_scaling_factor: Optional[float]#

0.3

class bridge.models.glm.glm45_provider.GLM45ModelProvider355B#

Bases: bridge.models.glm.glm45_provider.GLMMoEModelProvider

Provider for GLM 4.5 355B-A32B: https://huggingface.co/zai-org/GLM-4.5

num_layers: int#

92

num_moe_experts: int#

160

hidden_size: int#

5120

ffn_hidden_size: int#

12288

moe_layer_freq: Union[int, List[int]]#

‘field(…)’

moe_ffn_hidden_size: int#

1536

moe_shared_expert_intermediate_size: int#

1536

qk_layernorm: bool#

True

moe_router_topk_scaling_factor: float#

2.5

class bridge.models.glm.glm45_provider.GLM45AirModelProvider106B#

Bases: bridge.models.glm.glm45_provider.GLMMoEModelProvider

Provider for GLM 4.5 Air 106B-A12B: https://huggingface.co/zai-org/GLM-4.5-Air

num_layers: int#

46

num_moe_experts: int#

128

hidden_size: int#

4096

ffn_hidden_size: int#

10944

moe_layer_freq: Union[int, List[int]]#

‘field(…)’

moe_ffn_hidden_size: int#

1408

moe_shared_expert_intermediate_size: int#

1408

qk_layernorm: bool#

False

moe_router_topk_scaling_factor: float#

1.0