bridge.models.gemma_vl.gemma4_vl_provider#

Gemma 4 VL model provider.

Module Contents#

Classes#

Gemma4VLModelProvider

Model provider for Gemma 4 Vision-Language models.

API#

class bridge.models.gemma_vl.gemma4_vl_provider.Gemma4VLModelProvider#

Bases: megatron.bridge.models.gemma.gemma4_provider.Gemma4ModelProvider

Model provider for Gemma 4 Vision-Language models.

Extends Gemma4ModelProvider with vision tower config, multimodal projector config, and token IDs for vision-text fusion.

scatter_embedding_sequence_parallel: bool#

False

vision_config: Any#

None

text_config: Any#

None

vision_soft_tokens_per_image: int#

280

bos_token_id: int#

2

eos_token_id: int#

1

image_token_id: int#

258880

video_token_id: int#

258884

freeze_language_model: bool#

False

freeze_vision_model: bool#

False

freeze_vision_projection: bool#

False

provide(
pre_process=None,
post_process=None,
vp_stage=None,
) megatron.bridge.models.gemma_vl.modeling_gemma4_vl.Gemma4VLModel#
provide_language_model(
pre_process=None,
post_process=None,
vp_stage=None,
) megatron.core.models.gpt.GPTModel#