nemo_automodel.components.speculative.dspark.draft_qwen3
nemo_automodel.components.speculative.dspark.draft_qwen3
Module Contents
Classes
| Name | Description |
|---|---|
Qwen3DSparkAttention | - |
Qwen3DSparkDecoderLayer | - |
Qwen3DSparkModel | - |
Functions
| Name | Description |
|---|---|
apply_rotary_pos_emb | - |
Data
API
class nemo_automodel.components.speculative.dspark.draft_qwen3.Qwen3DSparkAttention( config, layer_idx: int )
Bases: Module
attention_dropout
= config.attention_dropout
head_dim
k_norm
k_proj
num_attention_heads
= config.num_attention_heads
num_key_value_groups
num_key_value_heads
= config.num_key_value_heads
o_proj
q_norm
q_proj
scaling
= self.head_dim ** -0.5
sliding_window
v_proj
nemo_automodel.components.speculative.dspark.draft_qwen3.Qwen3DSparkAttention.forward( hidden_states: torch.Tensor, target_hidden_states: torch.Tensor, position_embeddings: tuple[torch.Tensor, torch.Tensor], attention_mask: typing.Optional[torch.Tensor], past_key_values: typing.Optional[transformers.cache_utils.Cache] = None, cache_position: typing.Optional[torch.LongTensor] = None, kwargs: typing_extensions.Unpack[transformers.models.qwen3.modeling_qwen3.FlashAttentionKwargs] = {} ) -> tuple[torch.Tensor, typing.Optional[torch.Tensor]]
class nemo_automodel.components.speculative.dspark.draft_qwen3.Qwen3DSparkDecoderLayer( config, layer_idx: int )
Bases: GradientCheckpointingLayer
hidden_size
= config.hidden_size
input_layernorm
mlp
= Qwen3MLP(config)
post_attention_layernorm
self_attn
nemo_automodel.components.speculative.dspark.draft_qwen3.Qwen3DSparkDecoderLayer.forward( target_hidden_states: typing.Optional[torch.Tensor] = None, hidden_states: typing.Optional[torch.Tensor] = None, attention_mask: typing.Optional[torch.Tensor] = None, position_ids: typing.Optional[torch.LongTensor] = None, past_key_value: typing.Optional[transformers.cache_utils.Cache] = None, output_attentions: typing.Optional[bool] = False, use_cache: typing.Optional[bool] = False, cache_position: typing.Optional[torch.LongTensor] = None, position_embeddings: typing.Optional[typing_extensions.Tuple[torch.Tensor, torch.Tensor]] = None, kwargs: typing_extensions.Unpack[transformers.models.qwen3.modeling_qwen3.FlashAttentionKwargs] = {} ) -> typing_extensions.Tuple[torch.FloatTensor, typing.Optional[typing_extensions.Tuple[torch.FloatTensor, torch.FloatTensor]]]
class nemo_automodel.components.speculative.dspark.draft_qwen3.Qwen3DSparkModel( config )
Bases: Qwen3PreTrainedModel
_no_split_modules
= ['Qwen3DSparkDecoderLayer']
block_size
= int(config.block_size)
embed_tokens
enable_confidence_head
= bool(config.enable_confidence_head)
fc
hidden_norm
layers
lm_head
markov_head
= build_markov_head(config)
mask_token_id
= config.mask_token_id
norm
num_anchors
= int(config.num_anchors)
rotary_emb
= Qwen3RotaryEmbedding(config)
target_layer_ids
= config.target_layer_ids
nemo_automodel.components.speculative.dspark.draft_qwen3.Qwen3DSparkModel._forward_backbone( position_ids: torch.LongTensor, attention_mask: typing.Optional[torch.Tensor] = None, noise_embedding: typing.Optional[torch.Tensor] = None, target_hidden_states: typing.Optional[torch.Tensor] = None, past_key_values: typing.Optional[transformers.cache_utils.Cache] = None, use_cache: bool = False, kwargs = {} ) -> torch.Tensor
nemo_automodel.components.speculative.dspark.draft_qwen3.Qwen3DSparkModel.compute_logits( hidden_states: torch.Tensor ) -> torch.Tensor
nemo_automodel.components.speculative.dspark.draft_qwen3.Qwen3DSparkModel.forward( input_ids: torch.Tensor, target_hidden_states: torch.Tensor, loss_mask: torch.Tensor, target_last_hidden_states: typing.Optional[torch.Tensor] = None ) -> nemo_automodel.components.speculative.dspark.common.DSparkForwardOutput
nemo_automodel.components.speculative.dspark.draft_qwen3.Qwen3DSparkModel.initialize_embeddings_and_head( embed_tokens: torch.nn.Module, lm_head: torch.nn.Module, freeze: bool = True )
nemo_automodel.components.speculative.dspark.draft_qwen3.Qwen3DSparkModel.predict_confidence_step( hidden_states: torch.Tensor, prev_token_ids: typing.Optional[torch.Tensor] = None ) -> typing.Optional[torch.Tensor]
nemo_automodel.components.speculative.dspark.draft_qwen3.Qwen3DSparkModel.sample_draft_token_step( base_logits: torch.Tensor, prev_token_ids: torch.Tensor, temperature: float = 0.0, hidden_states: typing.Optional[torch.Tensor] = None ) -> tuple[torch.Tensor, torch.Tensor]
nemo_automodel.components.speculative.dspark.draft_qwen3.Qwen3DSparkModel.sample_draft_tokens( base_logits: torch.Tensor, first_prev_token_ids: torch.Tensor, temperature: float = 0.0, hidden_states: typing.Optional[torch.Tensor] = None ) -> tuple[torch.Tensor, torch.Tensor]
nemo_automodel.components.speculative.dspark.draft_qwen3.Qwen3DSparkModel.set_embedding_head_trainable( trainable: bool )
nemo_automodel.components.speculative.dspark.draft_qwen3.apply_rotary_pos_emb( q, k, cos, sin, unsqueeze_dim = 1 )
nemo_automodel.components.speculative.dspark.draft_qwen3.__all__ = ['Qwen3DSparkModel', 'Qwen3DSparkAttention', 'Qwen3DSparkDecoderLayer']