bridge.models.mamba.mamba_provider#

Module Contents#

Classes#

MambaModelProvider

Configuration and provider for Megatron Core Mamba models.

MambaModelProvider130M

Configuration for a 130M parameter Mamba model.

MambaModelProvider370M

Configuration for a 370M parameter Mamba model.

MambaModelProvider780M

Configuration for a 780M parameter Mamba model.

MambaModelProvider1P3B

Configuration for a 1.3B parameter Mamba model.

MambaModelProvider2P7B

Configuration for a 2.7B parameter Mamba model.

NVIDIAMambaModelProvider8B

Configuration for a 8B parameter Mamba model used in NVIDIA research.

NVIDIAMambaHybridModelProvider8B

Configuration for a 8B parameter hybrid Mamba model used in NVIDIA research.

MambaProvider

Deprecated alias for MambaModelProvider.

MambaProvider130M

Deprecated alias for MambaModelProvider130M.

MambaProvider370M

Deprecated alias for MambaModelProvider370M.

MambaProvider780M

Deprecated alias for MambaModelProvider780M.

MambaProvider1_3B

Deprecated alias for MambaModelProvider1P3B.

MambaProvider2_7B

Deprecated alias for MambaModelProvider2P7B.

NVIDIAMambaProvider8B

Deprecated alias for NVIDIAMambaModelProvider8B.

NVIDIAMambaHybridProvider8B

Deprecated alias for NVIDIAMambaHybridModelProvider8B.

Functions#

Data#

API#

bridge.models.mamba.mamba_provider.logger#

‘getLogger(…)’

class bridge.models.mamba.mamba_provider.MambaModelProvider#

Bases: megatron.bridge.models.transformer_config.TransformerConfig, megatron.bridge.models.model_provider.ModelProviderMixin[megatron.core.models.mamba.MambaModel]

Configuration and provider for Megatron Core Mamba models.

This class extends TransformerConfig with Mamba-specific parameters and provides a method to instantiate configured Mamba models.

fp16_lm_cross_entropy: bool#

False

parallel_output: bool#

True

share_embeddings_and_output_weights: bool#

False

params_dtype: torch.dtype#

None

fp16: bool#

False

bf16: bool#

True

num_layers: int#

2

mamba_num_groups: int#

8

num_attention_heads: int#

1

hybrid_attention_ratio: float#

0.0

hybrid_mlp_ratio: float#

0.0

hybrid_override_pattern: Optional[str]#

None

seq_length: int#

8192

position_embedding_type: Literal[learned_absolute, rope, none]#

‘none’

rotary_percent: float#

1.0

rotary_base: int#

10000

seq_len_interpolation_factor: Optional[float]#

None

apply_rope_fusion: bool#

True

make_vocab_size_divisible_by: int#

128

gated_linear_unit: bool#

False

normalization: str#

‘RMSNorm’

add_bias_linear: bool#

False

hidden_dropout: float#

0.0

attention_dropout: float#

0.0

layernorm_epsilon: float#

1e-05

attention_backend: megatron.core.transformer.enums.AttnBackend#

None

deallocate_pipeline_outputs: bool#

True

bias_dropout_fusion: bool#

True

cross_entropy_loss_fusion: bool#

True

mamba_stack_spec: Union[megatron.core.transformer.ModuleSpec, Callable[[], megatron.core.transformer.ModuleSpec]]#

None

vocab_size: Optional[int]#

None

should_pad_vocab: bool#

False

provide(
pre_process=None,
post_process=None,
vp_stage=None,
) megatron.core.models.mamba.MambaModel#

Configure and instantiate a Megatron Core Mamba model based on this configuration.

Parameters:
  • pre_process – Whether to include pre-processing in the model, defaults to first pipeline stage

  • post_process – Whether to include post-processing in the model, defaults to last pipeline stage

  • vp_stage – Virtual pipeline stage

Returns:

Configured Megatron Core Mamba model instance

Return type:

MCoreMambaModel

class bridge.models.mamba.mamba_provider.MambaModelProvider130M#

Bases: bridge.models.mamba.mamba_provider.MambaModelProvider

Configuration for a 130M parameter Mamba model.

hybrid_override_pattern: str#

None

num_layers: int#

24

seq_length: int#

2048

hidden_size: int#

768

mamba_num_groups: int#

1

ffn_hidden_size: int#

768

make_vocab_size_divisible_by: int#

16

class bridge.models.mamba.mamba_provider.MambaModelProvider370M#

Bases: bridge.models.mamba.mamba_provider.MambaModelProvider

Configuration for a 370M parameter Mamba model.

hybrid_override_pattern: str#

None

num_layers: int#

48

seq_length: int#

2048

hidden_size: int#

1024

mamba_num_groups: int#

1

ffn_hidden_size: int#

1024

make_vocab_size_divisible_by: int#

16

class bridge.models.mamba.mamba_provider.MambaModelProvider780M#

Bases: bridge.models.mamba.mamba_provider.MambaModelProvider

Configuration for a 780M parameter Mamba model.

hybrid_override_pattern: str#

None

num_layers: int#

48

seq_length: int#

2048

hidden_size: int#

1536

mamba_num_groups: int#

1

ffn_hidden_size: int#

1536

make_vocab_size_divisible_by: int#

16

class bridge.models.mamba.mamba_provider.MambaModelProvider1P3B#

Bases: bridge.models.mamba.mamba_provider.MambaModelProvider

Configuration for a 1.3B parameter Mamba model.

hybrid_override_pattern: str#

None

num_layers: int#

48

seq_length: int#

2048

hidden_size: int#

2048

mamba_num_groups: int#

1

ffn_hidden_size: int#

2048

make_vocab_size_divisible_by: int#

16

class bridge.models.mamba.mamba_provider.MambaModelProvider2P7B#

Bases: bridge.models.mamba.mamba_provider.MambaModelProvider

Configuration for a 2.7B parameter Mamba model.

hybrid_override_pattern: str#

None

num_layers: int#

64

seq_length: int#

2048

hidden_size: int#

2560

mamba_num_groups: int#

1

ffn_hidden_size: int#

2560

make_vocab_size_divisible_by: int#

16

class bridge.models.mamba.mamba_provider.NVIDIAMambaModelProvider8B#

Bases: bridge.models.mamba.mamba_provider.MambaModelProvider

Configuration for a 8B parameter Mamba model used in NVIDIA research.

hybrid_override_pattern: str#

None

num_attention_heads: int#

32

num_layers: int#

56

seq_length: int#

4096

hidden_size: int#

4096

mamba_num_groups: int#

8

ffn_hidden_size: int#

4096

make_vocab_size_divisible_by: int#

128

class bridge.models.mamba.mamba_provider.NVIDIAMambaHybridModelProvider8B#

Bases: bridge.models.mamba.mamba_provider.MambaModelProvider

Configuration for a 8B parameter hybrid Mamba model used in NVIDIA research.

hybrid_override_pattern: str#

‘M-M-M–M-M*-M-M-M-M–M*-M-M-M-M-M*–M-M-M-M-M*-M–M-M-M-’

num_layers: int#

56

seq_length: int#

4096

hidden_size: int#

4096

mamba_num_groups: int#

8

ffn_hidden_size: int#

16384

num_attention_heads: int#

32

num_query_groups: int#

8

make_vocab_size_divisible_by: int#

128

bridge.models.mamba.mamba_provider._warn_deprecated(old_cls: str, new_cls: str) None#
class bridge.models.mamba.mamba_provider.MambaProvider#

Bases: bridge.models.mamba.mamba_provider.MambaModelProvider

Deprecated alias for MambaModelProvider.

Deprecated: This alias remains for backward compatibility and will be removed in a future release. Import and use MambaModelProvider instead.

__post_init__() None#
class bridge.models.mamba.mamba_provider.MambaProvider130M#

Bases: bridge.models.mamba.mamba_provider.MambaModelProvider130M

Deprecated alias for MambaModelProvider130M.

Deprecated: This alias remains for backward compatibility and will be removed in a future release. Import and use MambaModelProvider130M instead.

__post_init__() None#
class bridge.models.mamba.mamba_provider.MambaProvider370M#

Bases: bridge.models.mamba.mamba_provider.MambaModelProvider370M

Deprecated alias for MambaModelProvider370M.

Deprecated: This alias remains for backward compatibility and will be removed in a future release. Import and use MambaModelProvider370M instead.

__post_init__() None#
class bridge.models.mamba.mamba_provider.MambaProvider780M#

Bases: bridge.models.mamba.mamba_provider.MambaModelProvider780M

Deprecated alias for MambaModelProvider780M.

Deprecated: This alias remains for backward compatibility and will be removed in a future release. Import and use MambaModelProvider780M instead.

__post_init__() None#
class bridge.models.mamba.mamba_provider.MambaProvider1_3B#

Bases: bridge.models.mamba.mamba_provider.MambaModelProvider1P3B

Deprecated alias for MambaModelProvider1P3B.

Deprecated: This alias remains for backward compatibility and will be removed in a future release. Import and use MambaModelProvider1P3B instead.

__post_init__() None#
class bridge.models.mamba.mamba_provider.MambaProvider2_7B#

Bases: bridge.models.mamba.mamba_provider.MambaModelProvider2P7B

Deprecated alias for MambaModelProvider2P7B.

Deprecated: This alias remains for backward compatibility and will be removed in a future release. Import and use MambaModelProvider2P7B instead.

__post_init__() None#
class bridge.models.mamba.mamba_provider.NVIDIAMambaProvider8B#

Bases: bridge.models.mamba.mamba_provider.NVIDIAMambaModelProvider8B

Deprecated alias for NVIDIAMambaModelProvider8B.

Deprecated: This alias remains for backward compatibility and will be removed in a future release. Import and use NVIDIAMambaModelProvider8B instead.

__post_init__() None#
class bridge.models.mamba.mamba_provider.NVIDIAMambaHybridProvider8B#

Bases: bridge.models.mamba.mamba_provider.NVIDIAMambaHybridModelProvider8B

Deprecated alias for NVIDIAMambaHybridModelProvider8B.

Deprecated: This alias remains for backward compatibility and will be removed in a future release. Import and use NVIDIAMambaHybridModelProvider8B instead.

__post_init__() None#