bridge.models.mistral.mistral_provider#

Module Contents#

Classes#

MistralModelProvider

Base model provider for Mistral 7B Model: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3

MistralSmall3ModelProvider24B

Config for Mistral Small 3 24B: https://huggingface.co/mistralai/Mistral-Small-24B-Instruct-2501

Data#

API#

bridge.models.mistral.mistral_provider.logger#

‘getLogger(…)’

class bridge.models.mistral.mistral_provider.MistralModelProvider#

Bases: megatron.bridge.models.gpt_provider.GPTModelProvider

Base model provider for Mistral 7B Model: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3

normalization: str#

‘RMSNorm’

activation_func: Callable#

None

position_embedding_type: str#

‘rope’

add_bias_linear: bool#

False

gated_linear_unit: bool#

True

num_layers: int#

32

hidden_size: int#

4096

num_attention_heads: int#

32

num_query_groups: int#

8

ffn_hidden_size: int#

14336

seq_length: int#

32768

attention_dropout: float#

0.0

hidden_dropout: float#

0.0

share_embeddings_and_output_weights: bool#

False

init_method_std: float#

0.02

layernorm_epsilon: float#

1e-05

window_size: List[int]#

None

rotary_base: float#

1000000.0

params_dtype: torch.dtype#

None

vocab_size: int#

32768

bf16: bool#

True

class bridge.models.mistral.mistral_provider.MistralSmall3ModelProvider24B#

Bases: bridge.models.mistral.mistral_provider.MistralModelProvider

Config for Mistral Small 3 24B: https://huggingface.co/mistralai/Mistral-Small-24B-Instruct-2501

num_layers: int#

40

hidden_size: int#

5120

ffn_hidden_size: int#

32768

num_attention_heads: int#

32

kv_channels: int#

128

seq_length: int#

32768

window_size: List[int]#

None

cp_comm_type: str#

None

rotary_percent: float#

1.0

rotary_base: float#

100000000.0

params_dtype: torch.dtype#

None

vocab_size: int#

131072