DevelopmentAPI ReferenceFull Library ReferenceNemo AutomodelNemo AutomodelComponentsModelsGlm4 Moe Lite
nemo_automodel.components.models.glm4_moe_lite.model
nemo_automodel.components.models.glm4_moe_lite.model
Module Contents
Classes
| Name | Description |
|---|---|
Block | - |
Glm4MoeLiteForCausalLM | - |
Glm4MoeLiteModel | - |
Data
API
class nemo_automodel.components.models.glm4_moe_lite.model.Block( layer_idx: int, config: typing.Any, moe_config: nemo_automodel.components.moe.layers.MoEConfig, backend: nemo_automodel.components.models.common.utils.BackendConfig )
Bases: Module
input_layernorm
mlp
= MoE(moe_config, backend)
post_attention_layernorm
self_attn
= MLA(config, backend)
nemo_automodel.components.models.glm4_moe_lite.model.Block._mlp( x: torch.Tensor, padding_mask: torch.Tensor | None ) -> torch.Tensor
nemo_automodel.components.models.glm4_moe_lite.model.Block.forward( x: torch.Tensor, freqs_cis: torch.Tensor, attention_mask: torch.Tensor | None = None, padding_mask: torch.Tensor | None = None, attn_kwargs: typing.Any = {} ) -> torch.Tensor
nemo_automodel.components.models.glm4_moe_lite.model.Block.init_weights( buffer_device: torch.device )
class nemo_automodel.components.models.glm4_moe_lite.model.Glm4MoeLiteForCausalLM( config: typing.Any, moe_config: nemo_automodel.components.moe.layers.MoEConfig | None = None, backend: nemo_automodel.components.models.common.utils.BackendConfig | None = None, kwargs = {} )
Bases: HFCheckpointingMixin, Module, MoEFSDPSyncMixin
_keep_in_fp32_modules_strict
= ['e_score_correction_bias']
backend
= backend or BackendConfig()
lm_head
model
state_dict_adapter
nemo_automodel.components.models.glm4_moe_lite.model.Glm4MoeLiteForCausalLM.forward( input_ids: torch.Tensor, position_ids: torch.Tensor | None = None, attention_mask: torch.Tensor | None = None, padding_mask: torch.Tensor | None = None, logits_to_keep: typing.Union[int, torch.Tensor] = 0, output_hidden_states: typing.Optional[bool] = None, attn_kwargs: typing.Any = {} ) -> transformers.modeling_outputs.CausalLMOutputWithPast
nemo_automodel.components.models.glm4_moe_lite.model.Glm4MoeLiteForCausalLM.from_config( config: typing.Any, moe_config: nemo_automodel.components.moe.layers.MoEConfig | None = None, backend: nemo_automodel.components.models.common.utils.BackendConfig | None = None, kwargs = {} )
classmethod
nemo_automodel.components.models.glm4_moe_lite.model.Glm4MoeLiteForCausalLM.from_pretrained( pretrained_model_name_or_path: str, model_args = (), kwargs = {} )
classmethod
nemo_automodel.components.models.glm4_moe_lite.model.Glm4MoeLiteForCausalLM.get_input_embeddings()
nemo_automodel.components.models.glm4_moe_lite.model.Glm4MoeLiteForCausalLM.get_output_embeddings()
nemo_automodel.components.models.glm4_moe_lite.model.Glm4MoeLiteForCausalLM.initialize_weights( buffer_device: torch.device | None = None, dtype: torch.dtype = torch.bfloat16 ) -> None
nemo_automodel.components.models.glm4_moe_lite.model.Glm4MoeLiteForCausalLM.set_input_embeddings( value )
nemo_automodel.components.models.glm4_moe_lite.model.Glm4MoeLiteForCausalLM.set_output_embeddings( new_embeddings )
class nemo_automodel.components.models.glm4_moe_lite.model.Glm4MoeLiteModel( config: typing.Any, backend: nemo_automodel.components.models.common.utils.BackendConfig, moe_config: nemo_automodel.components.moe.layers.MoEConfig | None = None, moe_overrides: dict | None = None )
Bases: Module
embed_tokens
freqs
layers
= torch.nn.ModuleDict()
max_seq_len
= config.max_position_embeddings
moe_config
= moe_config or MoEConfig(**moe_defaults)
norm
qk_rope_head_dim
= config.qk_rope_head_dim
nemo_automodel.components.models.glm4_moe_lite.model.Glm4MoeLiteModel.forward( input_ids: torch.Tensor, position_ids: torch.Tensor | None = None, attention_mask: torch.Tensor | None = None, padding_mask: torch.Tensor | None = None, attn_kwargs: typing.Any = {} ) -> torch.Tensor
nemo_automodel.components.models.glm4_moe_lite.model.Glm4MoeLiteModel.init_weights( buffer_device: torch.device | None = None ) -> None
nemo_automodel.components.models.glm4_moe_lite.model.ModelClass = Glm4MoeLiteForCausalLM