nemo_automodel.components.models.qwen3_next.model
nemo_automodel.components.models.qwen3_next.model
Module Contents
Classes
| Name | Description |
|---|---|
Block | - |
Qwen3NextForCausalLM | - |
Qwen3NextModel | - |
Data
API
class nemo_automodel.components.models.qwen3_next.model.Block( layer_idx: int, config: transformers.models.qwen3_next.configuration_qwen3_next.Qwen3NextConfig, moe_config: nemo_automodel.components.moe.config.MoEConfig, backend: nemo_automodel.components.models.common.BackendConfig )
Bases: Module
input_layernorm
layer_type
= config.layer_types[layer_idx]
linear_attn
= Qwen3NextFp32GatedDeltaNet(config, layer_idx)
mlp
= MoE(moe_config, backend)
post_attention_layernorm
self_attn
= Qwen3NextAttention(config, layer_idx, backend)
nemo_automodel.components.models.qwen3_next.model.Block._mlp( x: torch.Tensor, padding_mask: torch.Tensor | None ) -> torch.Tensor
nemo_automodel.components.models.qwen3_next.model.Block.forward( x: torch.Tensor, freqs_cis: torch.Tensor, attention_mask: torch.Tensor | None = None, padding_mask: torch.Tensor | None = None, position_ids: torch.Tensor | None = None, attn_kwargs: typing.Any = {} ) -> torch.Tensor
nemo_automodel.components.models.qwen3_next.model.Block.init_weights( buffer_device: torch.device )
class nemo_automodel.components.models.qwen3_next.model.Qwen3NextForCausalLM( config: transformers.models.qwen3_next.configuration_qwen3_next.Qwen3NextConfig, moe_config: nemo_automodel.components.moe.config.MoEConfig | None = None, backend: nemo_automodel.components.models.common.BackendConfig | None = None, kwargs = {} )
Bases: HFCheckpointingMixin, Module, MoEFSDPSyncMixin
backend
= backend or BackendConfig()
lm_head
model
state_dict_adapter
nemo_automodel.components.models.qwen3_next.model.Qwen3NextForCausalLM.forward( input_ids: torch.Tensor, position_ids: torch.Tensor | None = None, attention_mask: torch.Tensor | None = None, padding_mask: torch.Tensor | None = None, logits_to_keep: typing.Union[int, torch.Tensor] = 0, output_hidden_states: typing.Optional[bool] = None, attn_kwargs: typing.Any = {} ) -> transformers.modeling_outputs.CausalLMOutputWithPast
nemo_automodel.components.models.qwen3_next.model.Qwen3NextForCausalLM.from_config( config: transformers.models.qwen3_next.configuration_qwen3_next.Qwen3NextConfig, moe_config: nemo_automodel.components.moe.config.MoEConfig | None = None, backend: nemo_automodel.components.models.common.BackendConfig | None = None, kwargs = {} )
classmethod
nemo_automodel.components.models.qwen3_next.model.Qwen3NextForCausalLM.from_pretrained( pretrained_model_name_or_path: str, model_args = (), kwargs = {} )
classmethod
nemo_automodel.components.models.qwen3_next.model.Qwen3NextForCausalLM.get_input_embeddings()
nemo_automodel.components.models.qwen3_next.model.Qwen3NextForCausalLM.get_output_embeddings()
nemo_automodel.components.models.qwen3_next.model.Qwen3NextForCausalLM.initialize_weights( buffer_device: torch.device | None = None, dtype: torch.dtype = torch.bfloat16 ) -> None
nemo_automodel.components.models.qwen3_next.model.Qwen3NextForCausalLM.set_input_embeddings( value )
nemo_automodel.components.models.qwen3_next.model.Qwen3NextForCausalLM.set_output_embeddings( new_embeddings )
class nemo_automodel.components.models.qwen3_next.model.Qwen3NextModel( config: transformers.models.qwen3_next.configuration_qwen3_next.Qwen3NextConfig, backend: nemo_automodel.components.models.common.BackendConfig, moe_config: nemo_automodel.components.moe.config.MoEConfig | None = None, moe_overrides: dict | None = None )
Bases: Module
embed_tokens
head_dim
layers
= torch.nn.ModuleDict()
max_seq_len
= config.max_position_embeddings
moe_config
= moe_config or MoEConfig(**moe_defaults)
norm
rotary_emb
nemo_automodel.components.models.qwen3_next.model.Qwen3NextModel.forward( input_ids: torch.Tensor, position_ids: torch.Tensor | None = None, attention_mask: torch.Tensor | None = None, padding_mask: torch.Tensor | None = None, attn_kwargs: typing.Any = {} ) -> torch.Tensor
nemo_automodel.components.models.qwen3_next.model.Qwen3NextModel.init_weights( buffer_device: torch.device | None = None ) -> None
nemo_automodel.components.models.qwen3_next.model.ModelClass = Qwen3NextForCausalLM