bridge.models.t5_provider
#
Module Contents#
Classes#
Model config for T5 model. Adpated from megatron.core.models.t5.t5_model.T5Model |
Functions#
Spec for T5 when using transformer_engine mcore implementation |
|
Spec for T5 when using local mcore implementation |
Data#
API#
- bridge.models.t5_provider.logger#
‘getLogger(…)’
- bridge.models.t5_provider.transformer_engine_layer_spec(
- encoder_config: T5ModelProvider,
- decoder_config: T5ModelProvider,
Spec for T5 when using transformer_engine mcore implementation
- bridge.models.t5_provider.local_layer_spec(
- encoder_config: T5ModelProvider,
- decoder_config: T5ModelProvider,
Spec for T5 when using local mcore implementation
- class bridge.models.t5_provider.T5ModelProvider#
Bases:
megatron.core.transformer.transformer_config.TransformerConfig
,megatron.bridge.models.model_provider.ModelProviderMixin
[megatron.core.models.T5.t5_model.T5Model
]Model config for T5 model. Adpated from megatron.core.models.t5.t5_model.T5Model
- encoder_num_layers: int | None#
None
- fp16_lm_cross_entropy: bool#
False
- parallel_output: bool#
True
True
- make_vocab_size_divisible_by: int#
128
- position_embedding_type: Literal[learned_absolute, rope]#
‘learned_absolute’
- apply_rope_fusion: bool#
True
- max_position_embeddings: int#
512
- relative_attention_num_buckets: int#
32
- relative_attention_max_distance: int#
128
- rotary_percent: float#
1.0
- seq_len_interpolation_factor: Optional[float]#
None
- seq_length: int#
512
- seq_length_dec: int#
128
- encoder_pipeline_model_parallel_size: int#
0
- attention_softmax_in_fp32: float#
False
- bias_activation_fusion: bool#
True
- masked_softmax_fusion: bool#
True
- persist_layer_norm: bool#
True
- bias_dropout_fusion: bool#
True
- deallocate_pipeline_outputs: bool#
True
- num_moe_experts: Optional[int]#
None
- recompute_num_layers: int#
1
- distribute_saved_activations: bool#
False
- enable_autocast: bool#
False
- transformer_layer_spec: Union[megatron.core.transformer.spec_utils.ModuleSpec, Callable[[bridge.models.t5_provider.T5ModelProvider], megatron.core.transformer.spec_utils.ModuleSpec]]#
None
- vocab_size: Optional[int]#
None
- tp_comm_overlap_cfg: Optional[Union[str, dict[str, Any]]]#
None
- provide(
- pre_process=None,
- post_process=None,
- vp_stage=None,
- tokenizer=None,
Setup the T5 Model based on config definition.