Index _ | A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | Y | Z _ __abs__() (bridge.utils.import_utils.UnavailableMeta method) __add__() (bridge.diffusion.data.common.diffusion_sample.DiffusionSample method) (bridge.utils.import_utils.UnavailableMeta method) __all__ (in module bridge) (in module bridge.data.hf_processors) (in module bridge.data.mimo) (in module bridge.data.vlm_datasets) (in module bridge.diffusion.common.flow_matching.adapters) (in module bridge.diffusion.models.common) (in module bridge.diffusion.models.flux) (in module bridge.diffusion.models.flux.flow_matching) (in module bridge.diffusion.models.wan.inference.utils) (in module bridge.diffusion.recipes.flux) (in module bridge.diffusion.recipes.wan) (in module bridge.models) (in module bridge.models.bailing) (in module bridge.models.common) (in module bridge.models.conversion) (in module bridge.models.decorators) (in module bridge.models.decorators.dispatch) (in module bridge.models.deepseek) (in module bridge.models.gemma) (in module bridge.models.gemma_vl) (in module bridge.models.glm) (in module bridge.models.glm_vl) (in module bridge.models.gpt_oss) (in module bridge.models.hf_pretrained) (in module bridge.models.kimi) (in module bridge.models.kimi_vl) (in module bridge.models.llama) (in module bridge.models.llama_nemotron) (in module bridge.models.mamba) (in module bridge.models.mimo) (in module bridge.models.minimax_m2) (in module bridge.models.ministral3) (in module bridge.models.mistral) (in module bridge.models.nemotron) (in module bridge.models.nemotron_vl) (in module bridge.models.nemotronh) (in module bridge.models.olmoe) (in module bridge.models.qwen3_asr) (in module bridge.models.qwen3_asr.hf_qwen3_asr.configuration_qwen3_asr) (in module bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr) (in module bridge.models.qwen3_asr.hf_qwen3_asr.processing_qwen3_asr) (in module bridge.models.qwen_audio) (in module bridge.models.qwen_omni) (in module bridge.models.qwen_vl) (in module bridge.models.qwen_vl.modelling_qwen3_vl) (in module bridge.models.sarvam) (in module bridge.peft.recompute) (in module bridge.recipes.deepseek) (in module bridge.recipes.gemma) (in module bridge.recipes.gemma3_vl) (in module bridge.recipes.glm) (in module bridge.recipes.glm_vl) (in module bridge.recipes.gpt) (in module bridge.recipes.gpt_oss) (in module bridge.recipes.kimi_vl) (in module bridge.recipes.llama) (in module bridge.recipes.ministral3) (in module bridge.recipes.moonlight) (in module bridge.recipes.moonlight.moonlight_16b) (in module bridge.recipes.nemotron_vl) (in module bridge.recipes.nemotronh) (in module bridge.recipes.nemotronh.nemotron_3_nano) (in module bridge.recipes.nemotronh.nemotron_3_super) (in module bridge.recipes.nemotronh.nemotron_nano_v2) (in module bridge.recipes.nemotronh.nemotronh) (in module bridge.recipes.olmoe) (in module bridge.recipes.qwen) (in module bridge.recipes.qwen2_audio) (in module bridge.recipes.qwen_vl) (in module bridge.training.utils.padding_utils) __bool__() (bridge.peft.walk_utils.HasBool method) __call__() (bridge.data.builders.hf_dataset.ProcessExampleFn method) (bridge.data.energon.task_encoder_utils.videohandler method) (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.FluxInferencePipeline method) (bridge.diffusion.models.flux.flux_layer_spec.FluxSingleTransformerBlock method) (bridge.diffusion.models.flux.flux_layer_spec.MMDiTLayer method) (bridge.diffusion.models.flux.flux_step.FluxForwardStep method) (bridge.diffusion.models.wan.wan_step.WanForwardStep method) (bridge.models.decorators.dispatch._Dispatch method) (bridge.models.gpt_full_te_layer_autocast_spec.TETransformerLayerAutocast method) (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.models.hf_pretrained.state.StateDict method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) (bridge.models.qwen3_asr.hf_qwen3_asr.processing_qwen3_asr.Qwen3ASRProcessor method) (bridge.peft.base.PEFT method) (bridge.training.forward_step_func_types.ForwardStepFunctor method) (bridge.training.forward_step_func_types.FourArgForwardStep method) (bridge.training.forward_step_func_types.ThreeArgForwardStep method) (bridge.training.forward_step_func_types.ThreeArgStateForwardStep method) (bridge.training.forward_step_func_types.TwoArgForwardStep method) (bridge.utils.import_utils.UnavailableMeta method) __contact_emails__ (in module bridge.package_info) __contact_names__ (in module bridge.package_info) __contains__() (bridge.models.hf_pretrained.state.StateDict method) __deepcopy__() (bridge.models.transformer_config.TransformerConfig method) (bridge.training.utils.config_utils._ConfigContainerBase method) __del__() (bridge.data.datasets.packed_parquet.GPTSFTPackedParquetDataset method) (bridge.data.datasets.utils._TextMemMapDataset method) __delete__() (bridge.utils.import_utils.UnavailableMeta method) __delitem__() (bridge.utils.import_utils.UnavailableMeta method) __description__ (in module bridge.package_info) __divmod__() (bridge.utils.import_utils.UnavailableMeta method) __download_url__ (in module bridge.package_info) __enter__() (bridge.training.utils.sig_utils.DistributedSignalHandler method) (bridge.utils.import_utils.UnavailableMeta method) (bridge.utils.import_utils.UnavailableNullContext method) __eq__() (bridge.utils.import_utils.UnavailableMeta method) __exit__() (bridge.training.utils.sig_utils.DistributedSignalHandler method) (bridge.utils.import_utils.UnavailableNullContext method) __floordiv__() (bridge.utils.import_utils.UnavailableMeta method) __ge__() (bridge.utils.import_utils.UnavailableMeta method) __get__() (bridge.utils.import_utils.UnavailableMeta method) __getattr__() (bridge.models.conversion.model_bridge._HFNameSuffixMapping method) (bridge.models.gpt.gpt_builder.GPTModelConfig method) (bridge.models.mamba.mamba_builder.MambaModelConfig method) (bridge.utils.import_utils.UnavailableMeta method) __getitem__() (bridge.data.datasets.packed_parquet.GPTSFTPackedParquetDataset method) (bridge.data.datasets.sft.GPTSFTDataset method) (bridge.data.datasets.sft.GPTSFTPackedDataset method) (bridge.data.datasets.utils._OnlineSampleMapping method) (bridge.data.datasets.utils._TextMemMapDataset method) (bridge.data.mimo.dataset.MimoDataset method) (bridge.data.samplers.RandomSeedDataset method) (bridge.data.vlm_datasets.conversation_dataset.VLMConversationDataset method) (bridge.diffusion.data.flux.flux_mock_datamodule._MockT2IDataset method) (bridge.diffusion.data.wan.wan_mock_datamodule._MockDataset method) (bridge.models.hf_pretrained.state.StateDict method) (bridge.models.hf_pretrained.state.StateSource method) (bridge.recipes.qwen_vl.qwen25_vl_dataset.MockQwen25VLDataset method) __gt__() (bridge.utils.import_utils.UnavailableMeta method) __hash__() (bridge.utils.import_utils.UnavailableMeta method) __homepage__ (in module bridge.package_info) __iadd__() (bridge.utils.import_utils.UnavailableMeta method) __idx_suffix__ (in module bridge.data.datasets.sft) (in module bridge.data.datasets.utils) __idx_version__ (in module bridge.data.datasets.sft) (in module bridge.data.datasets.utils) __ifloordiv__() (bridge.utils.import_utils.UnavailableMeta method) __ilshift__() (bridge.utils.import_utils.UnavailableMeta method) __imul__() (bridge.utils.import_utils.UnavailableMeta method) __index__() (bridge.utils.import_utils.UnavailableMeta method) __invert__() (bridge.utils.import_utils.UnavailableMeta method) __ipow__() (bridge.utils.import_utils.UnavailableMeta method) __irshift__() (bridge.utils.import_utils.UnavailableMeta method) __isub__() (bridge.utils.import_utils.UnavailableMeta method) __iter__() (bridge.data.energon.base_energon_datamodule.EnergonDataloader method) (bridge.data.samplers.MegatronPretrainingBatchSampler method) (bridge.data.samplers.MegatronPretrainingRandomSampler method) (bridge.data.samplers.MegatronPretrainingSampler method) (bridge.models.conversion.mapping_registry.MegatronMappingRegistry method) (bridge.models.hf_pretrained.state.StateDict method) (bridge.models.hf_pretrained.state.StateSource method) (bridge.utils.import_utils.UnavailableMeta method) __itruediv__() (bridge.utils.import_utils.UnavailableMeta method) __key__ (bridge.data.energon.hf_encoder_task_encoder.HFEncoderTaskSample attribute) __keys__ (bridge.data.energon.hf_encoder_task_encoder.HFEncoderTaskBatch attribute) __keywords__ (in module bridge.package_info) __le__() (bridge.utils.import_utils.UnavailableMeta method) __len__() (bridge.data.datasets.packed_parquet.GPTSFTPackedParquetDataset method) (bridge.data.datasets.sft.GPTSFTDataset method) (bridge.data.datasets.utils._OnlineSampleMapping method) (bridge.data.datasets.utils._TextMemMapDataset method) (bridge.data.mimo.dataset.MimoDataset method) (bridge.data.samplers.MegatronPretrainingBatchSampler method) (bridge.data.samplers.MegatronPretrainingRandomSampler method) (bridge.data.samplers.MegatronPretrainingSampler method) (bridge.data.samplers.RandomSeedDataset method) (bridge.data.vlm_datasets.conversation_dataset.VLMConversationDataset method) (bridge.diffusion.data.flux.flux_mock_datamodule._MockT2IDataset method) (bridge.diffusion.data.wan.wan_mock_datamodule._MockDataset method) (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.FlowMatchEulerDiscreteScheduler method) (bridge.models.conversion.mapping_registry.MegatronMappingRegistry method) (bridge.models.hf_pretrained.state.StateDict method) (bridge.models.hf_pretrained.state.StateSource method) (bridge.recipes.qwen_vl.qwen25_vl_dataset.MockQwen25VLDataset method) (bridge.utils.import_utils.UnavailableMeta method) __license__ (in module bridge.package_info) __lshift__() (bridge.utils.import_utils.UnavailableMeta method) __lt__() (bridge.diffusion.data.common.diffusion_sample.DiffusionSample method) (bridge.utils.import_utils.UnavailableMeta method) __mul__() (bridge.utils.import_utils.UnavailableMeta method) __ne__() (bridge.utils.import_utils.UnavailableMeta method) __neg__() (bridge.utils.import_utils.UnavailableMeta method) __new__() (bridge.models.minimax_m2.minimax_m2_provider.FullDimKNorm method) (bridge.models.minimax_m2.minimax_m2_provider.FullDimQNorm method) (bridge.utils.import_utils.UnavailableMeta method) __next__() (bridge.data.energon.base_energon_datamodule.EnergonDataloader method) __package_name__ (in module bridge.package_info) __post_init__() (bridge.data.datasets.packed_sequence.PackedSequenceSpecs method) (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDataModuleConfig method) (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDatasetConfig method) (bridge.diffusion.data.flux.flux_mock_datamodule.FluxMockDataModuleConfig method) (bridge.diffusion.data.wan.wan_energon_datamodule.WanDataModuleConfig method) (bridge.diffusion.data.wan.wan_energon_datamodule.WanDatasetConfig method) (bridge.diffusion.data.wan.wan_mock_datamodule.WanMockDataModuleConfig method) (bridge.models.distillation_provider.DistillationProvider method) (bridge.models.mimo.llava_provider.LlavaMimoProvider method) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModel47BProvider method) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModel4BProvider method) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModel56BProvider method) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModel8BProvider method) (bridge.models.nemotronh.nemotron_h_provider.NemotronNano12Bv2Provider method) (bridge.models.nemotronh.nemotron_h_provider.NemotronNano9Bv2Provider method) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider method) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider method) (bridge.models.transformer_config.HeterogeneousTransformerConfig method) (bridge.models.transformer_config.MLATransformerConfig method) (bridge.models.transformer_config.TransformerConfig method) (bridge.peft.canonical_lora.CanonicalLoRA method) (bridge.peft.dora.DoRA method) (bridge.training.config.DistributedDataParallelConfig method) (bridge.training.config.GPTDatasetConfig method) (bridge.training.config.OptimizerConfig method) __pow__() (bridge.utils.import_utils.UnavailableMeta method) __radd__() (bridge.diffusion.data.common.diffusion_sample.DiffusionSample method) (bridge.utils.import_utils.UnavailableMeta method) __rdivmod__() (bridge.utils.import_utils.UnavailableMeta method) __reduce__() (bridge.data.datasets.utils._OnlineSampleMapping method) __reduce_ex__() (bridge.data.datasets.utils._OnlineSampleMapping method) __repository_url__ (in module bridge.package_info) __repr__() (bridge.models.conversion.auto_bridge.AutoBridge method) (bridge.models.conversion.mapping_registry.MegatronMappingRegistry method) (bridge.models.decorators.dispatch._Dispatch method) (bridge.models.hf_pretrained.causal_lm._ConfigOnlyPretrainedShim method) (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.models.hf_pretrained.state.StateDict method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) __rfloordiv__() (bridge.utils.import_utils.UnavailableMeta method) __rlshift__() (bridge.utils.import_utils.UnavailableMeta method) __rmul__() (bridge.utils.import_utils.UnavailableMeta method) __rpow__() (bridge.utils.import_utils.UnavailableMeta method) __rrshift__() (bridge.utils.import_utils.UnavailableMeta method) __rshift__() (bridge.utils.import_utils.UnavailableMeta method) __rsub__() (bridge.utils.import_utils.UnavailableMeta method) __rtruediv__() (bridge.utils.import_utils.UnavailableMeta method) __sample_type__ (bridge.data.energon.task_encoder_utils.ChatMLWebdataset attribute) __setattr__() (bridge.models.distillation_provider.DistillationProvider method) (bridge.models.gpt.gpt_builder.GPTModelConfig method) (bridge.models.mamba.mamba_builder.MambaModelConfig method) (bridge.training.mixed_precision.MixedPrecisionConfig method) __setitem__() (bridge.utils.import_utils.UnavailableMeta method) __shortversion__ (in module bridge.package_info) __slots__ (bridge.models.decorators.dispatch._Dispatch attribute) __str__() (bridge.data.datasets.utils._OnlineSampleMapping method) __sub__() (bridge.utils.import_utils.UnavailableMeta method) __subflavors__ (bridge.data.energon.hf_encoder_task_encoder.HFEncoderTaskBatch attribute) (bridge.data.energon.hf_encoder_task_encoder.HFEncoderTaskSample attribute) __truediv__() (bridge.utils.import_utils.UnavailableMeta method) __version__ (bridge.training.utils.config_utils._ConfigContainerBase attribute) (in module bridge.package_info) _accumulate_grouped_export() (bridge.models.conversion.model_bridge.MegatronModelBridge method) _ACTIVATION_FUNC_TO_STR (in module bridge.utils.activation_map) _add_quantization_mappings() (bridge.models.conversion.mapping_registry.MegatronMappingRegistry method) _add_separate_layernorm_mappings() (bridge.models.conversion.mapping_registry.MegatronMappingRegistry method) _add_speaker_and_signal() (in module bridge.data.datasets.utils) _adjust_gemma2_vocab_size() (in module bridge.recipes.gemma.gemma2) _adjust_gemma3_vocab_size() (in module bridge.recipes.gemma.gemma3) _align_expert_weight_to_shape() (in module bridge.models.conversion.param_mapping) _All2AllHp2Sp (class in bridge.peft.utils) _apply_cfgs() (bridge.training.comm_overlap.CommOverlapConfig method) _apply_freezing() (bridge.models.mimo.mimo_provider.MimoModelProvider method) _apply_overrides() (in module bridge.training.utils.omegaconf_utils) _apply_peft_transformation() (in module bridge.training.setup) _BLEND_PER_SPLIT_TYPE (in module bridge.recipes.utils.dataset_utils) _BLEND_TYPE (in module bridge.recipes.utils.dataset_utils) _BridgeImplClass (in module bridge.models.conversion.model_bridge) _broadcast_shared_embeddings() (bridge.models.conversion.model_bridge.MegatronModelBridge method) _build_data_from_text() (bridge.data.datasets.utils._JSONLMemMapDataset method) (bridge.data.datasets.utils._TextMemMapDataset method) _build_datasets() (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder method) _build_distributed_model() (in module bridge.training.setup) _build_generation_config() (bridge.models.hf_pretrained.causal_lm._ConfigOnlyPretrainedShim static method) _build_inputs() (bridge.recipes.qwen_vl.qwen25_vl_dataset.MockQwen25VLDataset method) _build_lora_hf_names() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) _build_loss_mask() (bridge.data.datasets.sft.GPTSFTDataset method) (bridge.data.datasets.sft.GPTSFTPackedDataset method) _build_memmap_index_files() (in module bridge.data.datasets.utils) _build_packed_expert_linear_out_by_base() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) _build_samples_mapping() (bridge.data.datasets.packed_parquet.GPTSFTPackedParquetDataset method) (bridge.data.datasets.sft.GPTSFTChatDataset method) (bridge.data.datasets.sft.GPTSFTDataset method) (bridge.data.datasets.sft.GPTSFTPackedDataset method) _build_sharded_state_dict_metadata() (in module bridge.training.checkpointing) _build_split_dataset() (bridge.data.mimo.hf_provider.HFMimoDatasetProvider method) (bridge.data.mimo.mock_provider.MockMimoProvider method) (bridge.data.vlm_datasets.hf_provider.HFDatasetConversationProvider method) (bridge.data.vlm_datasets.preloaded_provider.PreloadedVLMConversationProvider method) _build_vision_submodule_spec() (bridge.models.mimo.llava_provider.LlavaMimoProvider method) _build_worker_config() (bridge.data.energon.base_energon_datamodule.EnergonMultiModalDataModule method) _cached_get_key_to_filename_map() (bridge.models.hf_pretrained.state.SafeTensorsStateSource static method) _calculate_padded_vocab_size_cached() (in module bridge.utils.vocab_utils) _calculate_scheduler_steps() (bridge.training.config.ConfigContainer method) _calculate_shift() (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.FluxInferencePipeline static method) _call_target() (in module bridge.utils.instantiate_utils) _can_compile_fullgraph (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRPreTrainedModel attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerTextPreTrainedModel attribute) _can_record_outputs (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRPreTrainedModel attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerForConditionalGeneration attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerTextModel attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerTextPreTrainedModel attribute) _causal_lm_architecture() (bridge.models.conversion.auto_bridge.AutoBridge method) _ceil_to_nearest() (bridge.data.datasets.sft.GPTSFTDataset method) _chat_preprocess() (in module bridge.data.datasets.utils) _check_qwen3_5_available() (in module bridge.models.qwen_vl.qwen35_vl_provider) _check_qwen3_5_moe_available() (in module bridge.models.qwen_vl.qwen35_vl_provider) _CHECKPOINT_VERSION (in module bridge.training.checkpointing) _checkpointed_forward() (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_block.Qwen3VLTransformerBlock method) (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_block.Qwen3VLVisionTransformerBlock method) _collate_fn() (in module bridge.diffusion.data.flux.flux_mock_datamodule) _collate_ignore_samples() (in module bridge.diffusion.data.wan.wan_mock_datamodule) _collate_item() (bridge.data.datasets.sft.GPTSFTDataset method) _collect_packed_expert_adapter_tensors() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) _CommOverlapConfig (class in bridge.training.comm_overlap) _compatibles (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.FlowMatchEulerDiscreteScheduler attribute) _compute_attention_mask() (bridge.models.gemma_vl.modeling_gemma3_vl.Gemma3VLModel method) _compute_mamba_dim_info() (in module bridge.peft.adapter_wrapper) _CONFIG_FOR_DOC (in module bridge.models.bailing.modeling_bailing_moe_v2) _config_only_pretrained() (bridge.models.conversion.auto_bridge.AutoBridge method) _ConfigContainerBase (class in bridge.training.utils.config_utils) _ConfigOnlyPretrainedShim (class in bridge.models.hf_pretrained.causal_lm) _construct_adapters_names() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) _contains_code_references() (in module bridge.models.config) _convert_node() (in module bridge.utils.instantiate_utils) _convert_pattern_to_regex() (bridge.models.conversion.mapping_registry.MegatronMappingRegistry method) _convert_target_to_string() (in module bridge.utils.instantiate_utils) _convert_to_openai_messages() (in module bridge.data.datasets.utils) _convert_value_to_dict() (bridge.training.utils.config_utils._ConfigContainerBase class method) (in module bridge.models.config) _copy_custom_modeling_files() (bridge.models.hf_pretrained.base.PreTrainedBase method) _count_wildcard_groups() (bridge.models.conversion.param_mapping.MegatronParamMapping static method) _create_attention_mask() (bridge.data.datasets.sft.GPTSFTDataset method) _create_config_from_provider() (bridge.models.conversion.auto_bridge.AutoBridge method) _create_dataset() (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder method) _create_dist_train_pgs() (in module bridge.training.initialize) _create_list_wrapper() (in module bridge.peft.walk_utils) _create_loss_function() (bridge.diffusion.models.flux.flux_step.FluxForwardStep method) (bridge.diffusion.models.wan.wan_step.WanForwardStep method) (in module bridge.training.gpt_step) (in module bridge.training.llava_step) _create_loss_function_modelopt() (in module bridge.training.gpt_step) _create_model() (in module bridge.models.model_provider) _create_peft_pre_wrap_hook() (in module bridge.training.setup) _create_pg_collection() (in module bridge.training.initialize) _dataclass_to_omegaconf_dict() (in module bridge.training.utils.omegaconf_utils) _ddp_wrap() (in module bridge.models.common.unimodal) _deallocate_indexed_dataset_memory() (in module bridge.data.datasets.utils) _deepstack_process() (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_block.Qwen3VLTransformerBlock method) _default_comet_converter() (in module bridge.recipes.run_plugins) _default_fault_tolerance_converter() (in module bridge.recipes.run_plugins) _default_nsys_converter() (in module bridge.recipes.run_plugins) _default_perf_env_converter() (in module bridge.recipes.run_plugins) _default_preemption_converter() (in module bridge.recipes.run_plugins) _default_pytorch_profiler_converter() (in module bridge.recipes.run_plugins) _default_rope_init_fn() (in module bridge.models.bailing.modeling_bailing_moe_v2) _DEFAULT_SPIKY_LOSS_FACTOR (in module bridge.training.losses) _default_wandb_converter() (in module bridge.recipes.run_plugins) _defaults (bridge.models.qwen3_asr.hf_qwen3_asr.processing_qwen3_asr.Qwen3ASRProcessorKwargs attribute) _delete_cuda_graphs() (in module bridge.training.train) _dequant_fp8_blockwise() (in module bridge.models.minimax_m2.minimax_m2_bridge) _dequantize_mxfp4() (in module bridge.models.gpt_oss.gpt_oss_bridge) _detect_fp8_params() (bridge.models.conversion.model_bridge.MegatronModelBridge method) _detect_parallelism_type() (bridge.models.conversion.param_mapping.AutoMapping method) _DIRECT_ITERATION_DIR_SENTINEL (in module bridge.training.checkpointing) _Dispatch (class in bridge.models.decorators.dispatch) _dispatch() (bridge.models.decorators.dispatch._Dispatch method) _dummy_train_step() (in module bridge.training.train) _enable_gpt_oss_blackwell_mxfp8() (in module bridge.recipes.gpt_oss.gpt_oss) _enable_gpt_oss_hopper_fp8_current_scaling() (in module bridge.recipes.gpt_oss.gpt_oss) _encode_text() (in module bridge.diffusion.models.wan.flow_matching.flow_inference_pipeline) _ensure_reader() (bridge.data.datasets.packed_parquet.GPTSFTPackedParquetDataset method) _enum_representer() (in module bridge.utils.yaml_utils) _eos_token_id (bridge.data.energon.hf_encoder_task_encoder.HFEncoderVLMTaskEncoder property) _EXCLUDE_FIELD (in module bridge.training.utils.omegaconf_utils) _expand_image_tokens() (in module bridge.data.vlm_datasets.collate) _expand_mask() (in module bridge.models.bailing.modeling_bailing_moe_v2) _expose_language_model_for_cuda_graph_helper() (bridge.models.qwen_vl.modelling_qwen3_vl.model.Qwen3VLModel method) _extract_final_answer() (in module bridge.data.hf_processors.gsm8k) _extract_image_features() (bridge.models.kimi_vl.modeling_kimi_k25_vl.KimiK25VLModel method) _extract_megatron_lm_args_from_state_dict() (in module bridge.training.checkpointing) _extract_pos_args() (in module bridge.utils.instantiate_utils) _extract_tokenizer_model_name() (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder method) _fallback_get_hybrid_total_layer_count() (in module bridge.models.mamba.mamba_provider) _fetch_sample_from_memmap() (bridge.data.datasets.utils._TextMemMapDataset method) _filter_kwargs_for_target() (in module bridge.utils.instantiate_utils) _fim_permute_sequence() (bridge.data.datasets.fim_dataset.GPTFIMDataset method) _fim_split_and_permute_sequence() (bridge.data.datasets.fim_dataset.GPTFIMDataset method) _find_contiguous_blocks() (bridge.data.energon.hf_encoder_task_encoder.HFEncoderVLMTaskEncoder static method) _find_rank_module() (in module bridge.data.mimo.dp_utils) _finish_train() (in module bridge.training.train) _flash_attention_forward() (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2FlashAttention2 method) _flux_axes_dims_rope() (in module bridge.diffusion.conversion.flux.flux_bridge) _flux_ffn_hidden_size() (in module bridge.diffusion.conversion.flux.flux_bridge) _format_gpu_scores() (bridge.training.nvrx_straggler.NVRxStragglerDetectionManager static method) _format_list_for_override() (in module bridge.recipes.run_plugins) _format_location() (bridge.models.decorators.dispatch._Dispatch method) _format_no_implementation_error() (bridge.models.decorators.dispatch._Dispatch method) _forward_cached() (bridge.models.gemma.gemma3_provider.Gemma3RotaryEmbedding method) _forward_step_common() (in module bridge.training.gpt_step) _FP8_BLOCK_SIZE (in module bridge.models.minimax_m2.minimax_m2_bridge) _freeze_parameters() (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRAudioEncoder method) _FullDimQKNormMapping (class in bridge.models.minimax_m2.minimax_m2_bridge) _FullDimRMSNorm (class in bridge.models.minimax_m2.minimax_m2_provider) _function_representer() (in module bridge.utils.yaml_utils) _fuse_gdn_separate_to_grouped() (in module bridge.models.conversion.param_mapping) _gather_assistant_text_segments() (in module bridge.data.vlm_datasets.collate) _gather_expert_adapter_weight() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) _gather_flag_from_rank0() (bridge.training.nvrx_straggler.NVRxStragglerDetectionManager method) _generate_model_state_dict() (in module bridge.training.checkpointing) _generate_random_audio() (in module bridge.data.mimo.mock_provider) _generate_random_image() (bridge.recipes.qwen_vl.qwen25_vl_dataset.MockQwen25VLDataset method) (in module bridge.data.mimo.mock_provider) _generate_synthetic_examples() (bridge.data.mimo.mock_provider.MockMimoProvider method) _generation_config_representer() (in module bridge.utils.yaml_utils) _get_activation_fn() (bridge.peft.utils.ParallelLinearAdapter method) _get_adapter_wrap_module() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) _get_all_keys() (bridge.models.hf_pretrained.state.StateDict method) _get_artifact_name_and_version() (in module bridge.training.utils.wandb_utils) _get_base_hf_param_names_for_adapter() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) _get_checkpoint_format() (in module bridge.training.checkpointing) _get_config() (bridge.models.conversion.param_mapping.MegatronParamMapping method) _get_default_language_config() (bridge.models.mimo.llava_provider.LlavaMimoProvider method) _get_effective_dim() (bridge.peft.canonical_lora.CanonicalLoRA method) (bridge.peft.lora.LoRA method) _get_feat_extract_output_lengths() (in module bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr) (in module bridge.models.qwen3_asr.hf_qwen3_asr.processing_qwen3_asr) (in module bridge.models.qwen_omni.modeling_qwen25_omni.rope) _get_func_kwargs() (in module bridge.peft.walk_utils) _get_fused_adapter_linear_out_slices() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) _get_header_conversation_type_mask_role() (in module bridge.data.datasets.utils) _get_hybrid_total_layer_count() (in module bridge.models.mamba.mamba_provider) _get_init_false_fields() (in module bridge.training.utils.config_utils) _get_init_fn() (bridge.peft.utils.ParallelLinearAdapter method) _get_key_config_values() (in module bridge.training.config) _get_key_to_filename_map() (bridge.models.hf_pretrained.state.SafeTensorsStateSource method) _get_kimi_k25_vl_pipeline_layout() (in module bridge.recipes.kimi_vl.kimi_k25_vl) _get_layer_offset() (bridge.models.gpt_full_te_layer_autocast_spec.TETransformerLayerAutocast method) _get_llama_4_attn_scale() (bridge.models.ministral3.ministral3_provider.MinistralTEDotProductAttention static method) _get_lora_unwrapped_name() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) _get_maker() (bridge.data.vlm_datasets.hf_provider.HFDatasetConversationProvider method) _get_max_vision_seq_length() (bridge.models.qwen_vl.modelling_qwen3_vl.vision_model.Qwen3VLVisionModel method) _get_mcore_transformer_parent() (in module bridge.training.config) _get_model() (in module bridge.training.mlm_compat.model) _get_model_comm_overlap_cfgs() (bridge.training.comm_overlap.CommOverlapConfig method) _get_model_instance() (bridge.models.conversion.auto_bridge.AutoBridge method) _get_modelopt_checkpoint_path() (in module bridge.training.post_training.checkpointing) _get_module_dp_info() (in module bridge.training.mimo_step) _get_moonlight_pipeline_layout() (in module bridge.recipes.moonlight.moonlight_16b) _get_non_default_values() (in module bridge.training.config) _get_non_persistent_iteration() (in module bridge.training.checkpointing) _get_olmoe_pipeline_layout() (in module bridge.recipes.olmoe.olmoe_7b) _get_optimizer_overlap_cfgs() (bridge.training.comm_overlap.CommOverlapConfig method) _get_or_create_mapping() (bridge.models.conversion.param_mapping.AutoMapping method) _get_pg_collections_from_grids() (bridge.models.mimo.mimo_provider.MimoModelProvider method) _get_provider_from_model() (bridge.models.conversion.model_bridge.MegatronModelBridge method) _get_samples_mapping() (in module bridge.data.datasets.utils) _get_sarvam_moe_pipeline_layout() (in module bridge.models.sarvam.sarvam_provider) _get_scheduler() (in module bridge.training.optim) _get_tp_group() (in module bridge.models.minimax_m2.minimax_m2_provider) _get_train_state_from_state_dict() (in module bridge.training.checkpointing) _get_transformer_layer_spec() (in module bridge.training.mlm_compat.model) _get_unpad_data() (in module bridge.models.bailing.modeling_bailing_moe_v2) _get_vocab_size() (in module bridge.training.utils.theoretical_memory_utils) _get_wandb_artifact_tracker_filename() (in module bridge.training.utils.wandb_utils) _get_weight_norm() (bridge.peft.dora_layers.DoRALinear method) _gpt_provider() (in module bridge.training.mlm_compat.model) _grids (bridge.models.mimo.mimo_provider.MimoModelProvider attribute) _handle_mxfp8_param_buffer_copy() (in module bridge.training.train) _handle_straggler_report() (bridge.training.nvrx_straggler.NVRxStragglerDetectionManager method) _hf_expert_suffix() (bridge.models.glm.glm45_bridge.GLM45Bridge method) (bridge.models.glm_vl.glm_45v_bridge.GLM45VBridge method) _HF_LORA_SUFFIXES (in module bridge.models.conversion.peft_bridge) _HFNameSuffixMapping (class in bridge.models.conversion.model_bridge) _HYBRID_LAYER_PATTERN_KWARG (in module bridge.models.mamba.mamba_provider) _HYBRID_MAIN_PATTERN_SYMBOLS (in module bridge.models.mamba.mamba_provider) _identify_start_index_of_subsequence() (in module bridge.data.datasets.utils) _image_token_id (bridge.data.energon.hf_encoder_task_encoder.HFEncoderVLMTaskEncoder property) _images_to_pil() (in module bridge.data.energon.task_encoder_utils) _index_file_exists() (in module bridge.data.datasets.utils) _index_fn() (in module bridge.data.datasets.utils) _infer_gdn_in_proj_projection_from_name() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) _infer_hf_expert_idx() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) _infer_qkv_projection_from_name() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) _init_adapter() (bridge.peft.lora_layers.LinearAdapter static method) (bridge.peft.lora_layers.TELinearAdapter static method) _init_step_index() (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.FlowMatchEulerDiscreteScheduler method) _init_weights() (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2PreTrainedModel method) _initialize_distributed() (in module bridge.training.initialize) _initialize_tp_communicators() (in module bridge.training.initialize) _inject_pg_collection_into_language_spec() (bridge.models.mimo.mimo_provider.MimoModelProvider method) _inject_pg_collection_into_modality_spec() (bridge.models.mimo.mimo_provider.MimoModelProvider method) _interleave() (bridge.models.gpt_oss.gpt_oss_bridge.GPTOSSMLPGateUpProjMapping static method) _interleave_qkv() (bridge.peft.canonical_lora.LoRALinearSplitQKV method) _is_adapter_param_name() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) _is_empty_path() (in module bridge.diffusion.data.wan.wan_energon_datamodule) _is_fused_fc1_gate_up() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) _is_fused_qkv() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) _is_gdn_in_proj_split() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) _is_local_attn_layer() (in module bridge.models.gemma.gemma3_provider) _is_model_section() (in module bridge.training.checkpointing) _is_omegaconf_problematic() (in module bridge.training.utils.omegaconf_utils) _is_parquet_file() (in module bridge.data.datasets.packed_parquet) _is_quantized_expert_key() (bridge.models.kimi_vl.kimi_k25_vl_bridge.KimiK25VLBridge method) _is_target() (in module bridge.utils.instantiate_utils) _iter_unwrapped_models() (in module bridge.peft.recompute) _ITERATION_DIR_MARKERS (in module bridge.training.utils.checkpoint_utils) _JSONLMemMapDataset (class in bridge.data.datasets.utils) _Keys (class in bridge.utils.instantiate_utils) _lazy_import_pyarrow() (in module bridge.data.datasets.packed_parquet) _load_and_dequant() (bridge.models.minimax_m2.minimax_m2_bridge.MiniMaxM2Bridge method) _load_and_dequantize() (bridge.models.kimi_vl.kimi_k25_vl_bridge.KimiK25VLBridge method) _load_args_from_checkpoint() (in module bridge.training.mlm_compat.arguments) _load_base_checkpoint() (in module bridge.training.checkpointing) _load_checkpoint_from_path() (in module bridge.training.checkpointing) _load_config() (bridge.diffusion.conversion.flux.flux_hf_pretrained.PreTrainedFlux method) (bridge.diffusion.conversion.wan.wan_hf_pretrained.PreTrainedWAN method) (bridge.models.hf_pretrained.base.PreTrainedBase method) (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) _load_dataset() (bridge.data.builders.hf_dataset.HFDatasetBuilder method) (bridge.data.datasets.packed_parquet.GPTSFTPackedParquetDataset method) (bridge.data.datasets.sft.GPTSFTDataset method) (bridge.data.datasets.sft.GPTSFTPackedDataset method) _load_fsdp_dtensor_base_checkpoint() (in module bridge.training.checkpointing) _load_generation_config() (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) _load_global_dist_base_checkpoint() (in module bridge.training.checkpointing) _load_hf_dataset() (bridge.data.mimo.hf_provider.HFMimoDatasetProvider method) _load_image_processor() (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) _load_model() (bridge.diffusion.conversion.flux.flux_hf_pretrained.PreTrainedFlux method) (bridge.diffusion.conversion.wan.wan_hf_pretrained.PreTrainedWAN method) (bridge.models.hf_pretrained.base.PreTrainedBase method) (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) _load_model_state_dict() (in module bridge.training.checkpointing) _load_model_weights_from_checkpoint() (in module bridge.training.checkpointing) _load_non_persistent_base_checkpoint() (in module bridge.training.checkpointing) _load_preloaded_examples() (in module bridge.data.vlm_datasets.preloaded_provider) _load_processor() (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) _load_processors() (bridge.data.mimo.hf_provider.HFMimoDatasetProvider method) (bridge.data.mimo.mock_provider.MockMimoProvider method) _load_state_if_exists() (in module bridge.training.fault_tolerance) _load_tensors() (bridge.models.hf_pretrained.state.StateDict method) _load_tokenizer() (bridge.data.mimo.hf_provider.HFMimoDatasetProvider method) (bridge.data.mimo.mock_provider.MockMimoProvider method) (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) _locate() (in module bridge.utils.instantiate_utils) _locate_row() (bridge.data.datasets.packed_parquet.GPTSFTPackedParquetDataset method) _log_detailed() (bridge.diffusion.common.flow_matching.flow_matching_pipeline.FlowMatchingPipeline method) _log_gpu_perf_scores() (bridge.training.nvrx_straggler.NVRxStragglerDetectionManager method) _log_gpu_scores() (bridge.training.nvrx_straggler.NVRxStragglerDetectionManager method) _log_loss_detailed() (bridge.diffusion.common.flow_matching.flow_matching_pipeline.FlowMatchingPipeline method) _LooseGatedMLPMapping (class in bridge.models.conversion.param_mapping) _lora_seq_stats_cache (in module bridge.training.utils.flop_utils) _make_base_examples() (bridge.data.vlm_datasets.mock_provider.MockVLMConversationProvider method) _make_causal_mask() (in module bridge.models.bailing.modeling_bailing_moe_v2) _make_energon_dataset() (in module bridge.recipes.qwen_vl.qwen3_vl) _make_fused_branches() (bridge.peft.lora_layers.TEFusedLoRALinear method) _make_indexed_dataset_compatibility() (in module bridge.data.datasets.utils) _make_lora_branch() (bridge.peft.lora_layers.TEFusedLoRALinear method) _make_lora_param_name() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) _make_main_branch() (bridge.peft.lora_layers.TEFusedLoRALinear method) _make_single_example() (bridge.data.vlm_datasets.mock_provider.MockVLMConversationProvider method) _mamba_provider() (in module bridge.training.mlm_compat.model) _map_module() (in module bridge.peft.walk_utils) _map_module_dict() (in module bridge.peft.walk_utils) _map_module_list() (in module bridge.peft.walk_utils) _mark_trainable_params_for_tp_grad_avg() (bridge.diffusion.models.wan.wan_layer_spec.WanLayerWithAdaLN method) (bridge.diffusion.models.wan.wan_model.WanModel method) _mask_loss() (in module bridge.training.post_training.distillation) _mask_targets() (in module bridge.data.datasets.utils) _match_keys() (bridge.models.hf_pretrained.state.StateDict method) _maybe_attach_metric_loggers() (in module bridge.training.tensor_inspect) _maybe_cast_to_list() (bridge.data.datasets.sft.GPTSFTDataset method) (bridge.data.datasets.sft.GPTSFTPackedDataset method) _maybe_dequantize_fp8() (bridge.models.ministral3.ministral3_bridge.Ministral3Bridge static method) _maybe_destroy_process_group() (in module bridge.training.pretrain) _maybe_pad_vision_sequence_for_cuda_graph() (in module bridge.models.qwen_vl.modelling_qwen3_vl.vision_model) _maybe_register_fsdp_buffers() (in module bridge.training.train) _maybe_update_timeouts() (in module bridge.training.fault_tolerance) _maybe_validate_prompt_template() (bridge.data.datasets.sft.GPTSFTChatDataset method) (bridge.data.datasets.sft.GPTSFTDataset method) _MCORE_MAMBA_INIT_PARAMS (in module bridge.models.mamba.mamba_provider) _megatron_global_adapters_info_all_pp_ranks() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) _megatron_global_param_names_all_pp_ranks() (bridge.models.conversion.model_bridge.MegatronModelBridge method) _megatron_local_name_to_global() (in module bridge.models.conversion.model_bridge) _merge_canonical_adapter_from_weights() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) _merge_input_ids_with_image_features() (bridge.models.kimi_vl.modeling_kimi_k25_vl.KimiK25VLModel method) _merge_lora_adapter_weights() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) _merge_single_adapter_weight() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) _MIN_ITERS_FOR_STEP_TIMEOUT_UPDATE (in module bridge.training.fault_tolerance) _mock_collate_fn() (in module bridge.diffusion.data.wan.wan_mock_datamodule) _MockDataset (class in bridge.diffusion.data.wan.wan_mock_datamodule) _MockT2IDataset (class in bridge.diffusion.data.flux.flux_mock_datamodule) _model_bridge (bridge.models.conversion.auto_bridge.AutoBridge property) _MODULE_TYPE_REGISTRY (bridge.models.conversion.param_mapping.AutoMapping attribute) _move_frozen_params_to_device() (bridge.models.mimo.mimo_provider.MimoModelProvider static method) _MTPFlatteningMapping (class in bridge.models.nemotronh.nemotron_h_bridge) _MTPFlatteningQKVMapping (class in bridge.models.nemotronh.nemotron_h_bridge) _multiple_truncation() (bridge.data.datasets.sft.GPTSFTDataset method) _MUON_NESTEROV_KWARG (in module bridge.recipes.utils.optimizer_utils) _needs_data_for_module() (in module bridge.data.mimo.dp_utils) _NO_COPY_KEYS (bridge.models.transformer_config.TransformerConfig attribute) _no_split_modules (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2PreTrainedModel attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRAudioEncoder attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerForConditionalGeneration attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerTextModel attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerTextPreTrainedModel attribute) _NON_PERSISTENT_CKPT_SUBDIR (in module bridge.training.checkpointing) _norm() (bridge.diffusion.models.common.normalization.RMSNorm method) _normalize_expert_param_name() (bridge.models.conversion.param_mapping.MegatronParamMapping method) _normalize_paths() (in module bridge.data.vlm_datasets.preloaded_provider) _NUM_WARMUP_ITERS (in module bridge.training.fault_tolerance) _OnlineSampleMapping (class in bridge.data.datasets.utils) _OPTIMIZER_CONFIG_FIELDS (in module bridge.recipes.utils.optimizer_utils) _override_user_cfgs() (bridge.training.comm_overlap.CommOverlapConfig method) _pack_latents() (bridge.diffusion.models.flux.flow_matching.flux_adapter.MegatronFluxAdapter method) (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.FluxInferencePipeline static method) _packed_path_exists() (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder method) _pad_token_id (bridge.data.energon.hf_encoder_task_encoder.HFEncoderVLMTaskEncoder property) _parse_slurm_nodelist() (in module bridge.utils.slurm_utils) _partial_representer() (in module bridge.utils.yaml_utils) _partition_packed_batch_for_cp() (in module bridge.training.gpt_step) _patch_standard_attention_specs() (in module bridge.models.qwen_vl.qwen35_vl_provider) _patch_te_grouped_linear_single_grouped_weight() (in module bridge.models.gpt_provider) _patch_yarn_concentration_factor() (in module bridge.models.gpt_provider) _peft_common() (in module bridge.recipes.common) _peft_common_vlm() (in module bridge.recipes.common) _permute() (bridge.data.datasets.fim_dataset.GPTFIMDataset method) _pg_collection (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) _prepare_4d_causal_attention_mask_with_cache_position() (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRPreTrainedModelForConditionalGeneration method) _prepare_attention_mask() (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRAudioEncoder method) _prepare_batch_for_pipeline() (bridge.diffusion.models.flux.flux_step.FluxForwardStep method) _prepare_input_dict_or_list() (in module bridge.utils.instantiate_utils) _prepare_latent_image_ids() (bridge.diffusion.models.flux.flow_matching.flux_adapter.MegatronFluxAdapter method) (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.FluxInferencePipeline static method) _prepare_packed_split() (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder method) _preprocess() (in module bridge.data.datasets.utils) _pretrain() (in module bridge.training.pretrain) _pretrain_common() (in module bridge.recipes.common) _pretrained_config_representer() (in module bridge.utils.yaml_utils) _print_gpu_scores() (bridge.training.nvrx_straggler.NVRxStragglerDetectionManager method) _print_num_params() (in module bridge.models.common.unimodal) _print_stragglers() (bridge.training.nvrx_straggler.NVRxStragglerDetectionManager method) _process_example() (bridge.data.datasets.sft.GPTSFTChatDataset method) (bridge.data.datasets.sft.GPTSFTDataset method) _processor (bridge.data.vlm_datasets.mock_provider.MockVLMConversationProvider attribute) (bridge.recipes.qwen_vl.qwen25_vl_dataset.MockQwen25VLDatasetProvider attribute) _processors (bridge.data.mimo.hf_provider.HFMimoDatasetProvider attribute) (bridge.data.mimo.mock_provider.MockMimoProvider attribute) _provider_bridge_input (bridge.models.conversion.auto_bridge.AutoBridge property) _push_up() (bridge.data.datasets.packing_utils._SegmentTree method) _query() (bridge.data.datasets.packing_utils._SegmentTree method) _query_document_sample_shuffle_indices() (bridge.data.datasets.fim_dataset.GPTFIMDataset method) _qwen2_audio_common() (in module bridge.recipes.qwen2_audio.qwen2_audio) _qwen35_vl_apply_common() (in module bridge.recipes.qwen_vl.qwen35_vl) _qwen35_vl_apply_moe() (in module bridge.recipes.qwen_vl.qwen35_vl) _qwen35_vl_apply_peft_scheme() (in module bridge.recipes.qwen_vl.qwen35_vl) _qwen35_vl_enable_recompute() (in module bridge.recipes.qwen_vl.qwen35_vl) _QWEN3_5_DENSE_HF_CLASS_NAME (in module bridge.models.qwen_vl.qwen35_vl_bridge) _QWEN3_5_MOE_HF_CLASS_NAME (in module bridge.models.qwen_vl.qwen35_vl_bridge) _qwen3_vl_common() (in module bridge.recipes.qwen_vl.qwen3_vl) _record_to_conversation() (in module bridge.data.vlm_datasets.preloaded_provider) _register_pre_wrap_hook() (in module bridge.training.setup) _REGISTRY (in module bridge.data.utils) _replace_wildcards() (in module bridge.models.nemotronh.nemotron_h_bridge) _resolve_checkpoint_iteration() (in module bridge.training.checkpointing) _resolve_hf_adapter_param_name() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) _resolve_hooks() (bridge.models.mimo.mimo_provider.MimoModelProvider method) _resolve_names() (bridge.models.conversion.param_mapping.MegatronParamMapping method) _resolve_parquet_paths() (in module bridge.data.datasets.packed_parquet) _resolve_path() (bridge.models.hf_pretrained.state.SafeTensorsStateSource static method) _resolve_seq_length() (in module bridge.recipes.utils.dataset_utils) _resolve_string_fields() (in module bridge.models.transformer_config) _resolve_target() (in module bridge.utils.instantiate_utils) _resolve_target_class() (in module bridge.training.utils.config_utils) _response_value_formater() (in module bridge.data.datasets.utils) _restore_excluded_fields() (in module bridge.training.utils.omegaconf_utils) _RestrictedUnpickler (class in bridge.utils.safe_pickle) _retrieve_tokenized() (in module bridge.data.datasets.packed_sequence) _RUNTIME_ONLY_TARGETS (in module bridge.training.utils.checkpoint_utils) _safe_asdict() (in module bridge.models.transformer_config) _SAFE_MODULES (bridge.utils.safe_pickle._RestrictedUnpickler attribute) _safe_object_representer() (in module bridge.utils.yaml_utils) _sample_from_distribution() (bridge.diffusion.common.flow_matching.flow_matching_pipeline.FlowMatchingPipeline method) _sanitize_dataclass_config() (in module bridge.training.utils.config_utils) _sanitize_mlflow_metrics() (in module bridge.training.utils.mlflow_utils) _sanitize_run_config_object() (in module bridge.training.utils.checkpoint_utils) _save_generator_distributed() (bridge.models.hf_pretrained.state.SafeTensorsStateSource method) _SegmentTree (class in bridge.data.datasets.packing_utils) _select_checkpoint_dir() (bridge.diffusion.models.wan.flow_matching.flow_inference_pipeline.FlowInferencePipeline method) _select_expert_adapter_weight() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) _select_hf_base_param_name() (in module bridge.models.conversion.peft_bridge) _SEPARATE_LAYERNORM_REWRITES (bridge.models.conversion.mapping_registry.MegatronMappingRegistry attribute) _separate_template() (bridge.data.datasets.sft.GPTSFTDataset method) _SERIALIZABLE_CALLABLE_FIELDS (in module bridge.training.utils.omegaconf_utils) _set_embedder_weights_replica_id() (bridge.diffusion.models.flux.flux_model.Flux method) (bridge.diffusion.models.wan.wan_model.WanModel method) _set_mimo_random_seeds() (in module bridge.training.setup_mimo) _set_num_cuda_device_max_connections() (bridge.recipes.run_plugins.PerfEnvPlugin method) _set_random_seed() (in module bridge.training.initialize) _set_signal_handler() (bridge.training.state.GlobalState method) _setup_experts() (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2SparseMoeBlock method) _setup_flight_recorder_env() (in module bridge.training.initialize) _sft_common() (in module bridge.recipes.common) _sft_common_vlm() (in module bridge.recipes.common) _shape() (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2Attention method) _share_embeddings_and_output_weights() (bridge.models.conversion.model_bridge.MegatronModelBridge method) _shared_dataset (in module bridge.data.datasets.packed_sequence) _should_skip_and_handle_iteration() (in module bridge.training.train) _should_skip_mtp_duplicate_embedding_export() (bridge.models.conversion.model_bridge.MegatronModelBridge method) _should_treat_linear_fc1_as_unfused() (in module bridge.peft.canonical_lora) _sigma_to_t() (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.FlowMatchEulerDiscreteScheduler method) _SignatureType (in module bridge.models.decorators.dispatch) _skip_keys_device_placement (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2PreTrainedModel attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRPreTrainedModel attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerTextPreTrainedModel attribute) _split_fused_fc1_linear_out_weight() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) _split_gdn_grouped_to_separate() (in module bridge.models.conversion.param_mapping) _split_gdn_in_proj_linear_out_weight() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) _split_qkv() (bridge.diffusion.models.flux.flux_attention.JointSelfAttention method) _split_qkv_linear_out_weight() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) _split_text_by_placeholders() (in module bridge.data.vlm_datasets.preloaded_provider) _SPLIT_TYPE (in module bridge.recipes.utils.dataset_utils) _supports_attention_backend (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRPreTrainedModel attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerTextPreTrainedModel attribute) _supports_cache_class (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2PreTrainedModel attribute) _supports_flash_attn (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRPreTrainedModel attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerTextPreTrainedModel attribute) _supports_flash_attn_2 (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2PreTrainedModel attribute) _supports_flex_attn (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerTextPreTrainedModel attribute) _supports_sdpa (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2PreTrainedModel attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRAudioEncoder attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRPreTrainedModel attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerTextPreTrainedModel attribute) _swap_input_proj_halves() (bridge.models.mimo.mimo_bridge.MimoBridge static method) _tensor_to_pil() (in module bridge.data.energon.task_encoder_utils) _TextMemMapDataset (class in bridge.data.datasets.utils) _tied_weights_keys (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2ForCausalLM attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerForConditionalGeneration attribute) _timers_write_to_comet() (in module bridge.training.state) _timers_write_to_mlflow() (in module bridge.training.state) _timers_write_to_wandb() (in module bridge.training.state) _TModule (in module bridge.peft.walk_utils) _to_dict() (in module bridge.models.config) _tokenize() (in module bridge.data.datasets.utils) _tokenize_get_item() (in module bridge.data.datasets.packed_sequence) _tokenize_init_worker() (in module bridge.data.datasets.packed_sequence) _tokenize_with_placeholders() (bridge.data.mimo.dataset.MimoDataset method) _tokenizer (bridge.data.energon.hf_encoder_task_encoder.HFEncoderVLMTaskEncoder property) (bridge.data.mimo.hf_provider.HFMimoDatasetProvider attribute) (bridge.data.mimo.mock_provider.MockMimoProvider attribute) _tokenizer_config_from_args() (in module bridge.training.mlm_compat.arguments) _torch_dtype_representer() (in module bridge.utils.yaml_utils) _track_excluded_fields() (in module bridge.training.utils.omegaconf_utils) _training_step_with_pipeline() (bridge.diffusion.models.flux.flux_step.FluxForwardStep method) _transformer_config_from_args() (in module bridge.training.mlm_compat.arguments) _TRANSFORMERS_HAS_QWEN3_5_MOE (in module bridge.models.qwen_vl.qwen35_vl_provider) _transpose_first_dim() (in module bridge.training.checkpointing) _trim_blockwise_fp8_scale_inv_padding() (bridge.models.conversion.model_bridge.MegatronModelBridge method) _truncation() (bridge.data.datasets.sft.GPTSFTDataset method) _uninterleave() (bridge.models.gpt_oss.gpt_oss_bridge.GPTOSSMLPGateUpProjMapping method) _unpack_latents() (bridge.diffusion.models.flux.flow_matching.flux_adapter.MegatronFluxAdapter method) (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.FluxInferencePipeline static method) _unwrap_name() (bridge.models.conversion.model_bridge.MegatronModelBridge method) _upad_input() (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2FlashAttention2 method) _update() (bridge.data.datasets.packing_utils._SegmentTree method) _update_mimo_model_config_funcs() (in module bridge.training.setup_mimo) _update_model_config_funcs() (in module bridge.training.setup) _update_timeouts() (in module bridge.training.fault_tolerance) _uses_fused_experts() (bridge.models.glm.glm45_bridge.GLM45Bridge method) (bridge.models.glm_vl.glm_45v_bridge.GLM45VBridge method) _uses_vision_cuda_graph() (bridge.models.qwen_vl.modelling_qwen3_vl.vision_model.Qwen3VLVisionModel method) _validate_and_apply_deterministic_mode() (bridge.training.config.ConfigContainer method) _validate_and_set_vocab_size() (in module bridge.training.setup) _validate_and_sync_distributed_optimizer_settings() (in module bridge.training.config) _validate_config() (bridge.models.conversion.auto_bridge.AutoBridge class method) _validate_cp_comm_type() (bridge.training.config.ConfigContainer method) _validate_fine_grained_activation_offloading() (in module bridge.training.config) _validate_fp8_export_config() (bridge.models.conversion.auto_bridge.AutoBridge method) _validate_heterogeneous() (bridge.models.mimo.mimo_config.MimoParallelismConfig method) _validate_mixed_precision_consistency() (in module bridge.training.config) _validate_packed_path() (bridge.data.datasets.packed_sequence.PackedSequenceSpecs method) _validate_parallelism_constraints() (bridge.models.mimo.mimo_config.MimoParallelismConfig method) _validate_patterns() (bridge.models.conversion.param_mapping.MegatronParamMapping method) _validate_training_scheduler_compatibility() (bridge.training.config.ConfigContainer method) _verify_no_callables() (in module bridge.training.utils.omegaconf_utils) _videos_to_pil() (in module bridge.data.energon.task_encoder_utils) _vision_forward_packed_attention_setup() (in module bridge.models.qwen_vl.modelling_qwen3_vl.vision_model) _walk_model() (bridge.peft.base.PEFT method) _WAN_MODE_DEFAULTS (in module bridge.diffusion.models.wan.wan_step) _warmup_jit_function() (in module bridge.training.initialize) _warn_deprecated() (in module bridge.models.nemotronh.nemotron_h_provider) _with_progress_tracking() (bridge.models.conversion.model_bridge.MegatronModelBridge method) _wrap_with_mp_wrapper() (in module bridge.models.common.unimodal) A a2a_experimental (bridge.peft.lora.LoRA attribute) a2a_overlap (bridge.recipes.run_plugins.PerfEnvPlugin attribute) account_for_embedding_in_pipeline_split (bridge.models.gpt_provider.GPTModelProvider attribute) account_for_loss_in_pipeline_split (bridge.models.gpt_provider.GPTModelProvider attribute) activation_func (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.llama_nemotron.llama_nemotron_provider.LlamaNemotronHeterogeneousProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) (bridge.models.mistral.mistral_provider.MistralModelProvider attribute) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) ACTIVATION_FUNC_MAP (in module bridge.utils.activation_map) active_module_name (bridge.training.setup_mimo.MimoSetupOutput attribute) active_world_size (bridge.training.config.InProcessRestartConfig attribute) AdaLN (class in bridge.diffusion.models.flux.flux_layer_spec) AdaLNContinuous (class in bridge.diffusion.models.flux.flux_layer_spec) adapter_key (bridge.models.conversion.peft_bridge.AdapterWeight attribute) (bridge.models.conversion.peft_bridge.AdapterWeightConversionTask attribute) adapter_key_filter() (bridge.peft.base.PEFT method) ADAPTER_KEY_TO_SUFFIX (in module bridge.models.conversion.peft_bridge) ADAPTER_NAME_MAP (in module bridge.models.conversion.peft_bridge) AdapterAttributes (class in bridge.peft.utils) AdapterWeight (class in bridge.models.conversion.peft_bridge) AdapterWeightConversionTask (class in bridge.models.conversion.peft_bridge) AdapterWrapper (class in bridge.peft.adapter_wrapper) add() (bridge.training.callbacks.CallbackManager method) add_bias_linear (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.llama_nemotron.llama_nemotron_provider.LlamaNemotronHeterogeneousProvider attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) (bridge.models.mistral.mistral_provider.MistralModelProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) add_decoder (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) add_encoder (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) add_filter_to_all_loggers() (in module bridge.training.utils.log_utils) add_qkv_bias (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) add_residual() (bridge.diffusion.models.wan.wan_layer_spec.WanLayerWithAdaLN method) add_special_tokens (bridge.models.hf_pretrained.causal_lm.EncodeKwargs attribute) added_k_layernorm (bridge.diffusion.models.flux.flux_attention.JointSelfAttentionSubmodules attribute) added_linear_qkv (bridge.diffusion.models.flux.flux_attention.JointSelfAttentionSubmodules attribute) added_q_layernorm (bridge.diffusion.models.flux.flux_attention.JointSelfAttentionSubmodules attribute) ADDITIONAL_FILE_PATTERNS (bridge.models.conversion.model_bridge.MegatronModelBridge attribute) (bridge.models.nemotronh.nemotron_h_bridge.NemotronHBridge attribute) adjust_image_tokens() (in module bridge.models.nemotron_vl.nemotron_vl_utils) aggregate (bridge.training.comm_overlap.RingExchangeOverlapCfg attribute) align_param_gather (bridge.training.comm_overlap._CommOverlapConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) all2all_hp2sp() (in module bridge.peft.utils) all_gather_item() (in module bridge.training.utils.sig_utils) AllGatherVisionEmbeddings (class in bridge.models.qwen_vl.modelling_qwen3_vl.utils) alpha (bridge.models.conversion.peft_bridge.AdapterWeight attribute) (bridge.models.conversion.peft_bridge.AdapterWeightConversionTask attribute) (bridge.peft.canonical_lora.CanonicalLoRA attribute) (bridge.peft.dora.DoRA attribute) (bridge.peft.lora.LoRA attribute) always_return_pooled (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.ClipConfig attribute) AmaxFanoutMapping (class in bridge.models.conversion.quant_mapping) AmaxMapping (class in bridge.models.conversion.quant_mapping) append_to_progress_log() (in module bridge.training.utils.log_utils) apply_chat_template() (bridge.models.qwen3_asr.hf_qwen3_asr.processing_qwen3_asr.Qwen3ASRProcessor method) apply_dataset_override() (in module bridge.recipes.utils.dataset_utils) apply_flex_dispatcher_backend() (in module bridge.training.flex_dispatcher_backend) apply_interleaved_mrope() (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerTextRotaryEmbedding method) (bridge.models.qwen_vl.modelling_qwen3_vl.rope.Qwen3VLMultimodalRotaryEmbedding method) apply_moe_token_drop() (in module bridge.training.utils.moe_token_drop) apply_overrides() (in module bridge.training.utils.omegaconf_utils) apply_peft_adapter_filter_to_state_dict() (in module bridge.training.checkpointing) apply_query_key_layer_scaling (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider attribute) apply_rope_fusion (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) (bridge.models.llama_nemotron.llama_nemotron_provider.LlamaNemotronHeterogeneousProvider attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.qwen3_asr.modeling_qwen3_asr.transformer_config.Qwen3ASRTransformerConfig attribute) (bridge.models.qwen_omni.modeling_qwen25_omni.transformer_config.Qwen25OmniTransformerConfig attribute) (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config.Qwen3VLTransformerConfig attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) apply_rotary_pos_emb() (in module bridge.models.bailing.modeling_bailing_moe_v2) (in module bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr) apply_rotary_pos_emb_absolute() (in module bridge.models.qwen_vl.modelling_qwen3_vl.rope) apply_rotary_pos_emb_in_fp32 (bridge.models.qwen3_asr.modeling_qwen3_asr.transformer_config.Qwen3ASRTransformerConfig attribute) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_omni.modeling_qwen25_omni.transformer_config.Qwen25OmniTransformerConfig attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config.Qwen3VLTransformerConfig attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) apply_rotary_pos_emb_thd_absolute() (in module bridge.models.qwen_vl.modelling_qwen3_vl.rope) apply_run_config_backward_compat() (in module bridge.training.utils.config_utils) ARGS (bridge.utils.instantiate_utils._Keys attribute) ARTIFACTS (bridge.models.hf_pretrained.base.PreTrainedBase attribute) (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM attribute) (bridge.models.hf_pretrained.vlm.PreTrainedVLM attribute) as_dict() (bridge.models.common.base.ModelConfig method) (bridge.models.common.base.Serializable method) as_model_kwargs() (bridge.training.utils.visual_inputs.GenericVisualInputs method) (bridge.training.utils.visual_inputs.Qwen2_5_VLVisualInputs method) (bridge.training.utils.visual_inputs.Qwen2AudioInputs method) asdict() (bridge.models.transformer_config.TransformerConfig method) async_calls_queue (bridge.training.state.GlobalState property) async_strategy (bridge.training.config.CheckpointConfig attribute) async_tensor_model_parallel_allreduce (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) async_write_results_mp_mode (bridge.training.config.CheckpointConfig attribute) atomic_gemm (bridge.training.comm_overlap.PipelineOverlapCfg attribute) (bridge.training.comm_overlap.RingExchangeOverlapCfg attribute) attention_backend (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider56B attribute) ATTENTION_CLASSES (in module bridge.models.bailing.modeling_bailing_moe_v2) attention_dropout (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.gpt_provider.GPTProvider175B attribute) (bridge.models.llama_nemotron.llama_nemotron_provider.LlamaNemotronHeterogeneousProvider attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) (bridge.models.mistral.mistral_provider.MistralModelProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) attention_mask (bridge.data.energon.hf_encoder_task_encoder.HFEncoderTaskBatch attribute) (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) attention_output_gate (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) attention_softmax_in_fp32 (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) attentions (bridge.models.bailing.modeling_bailing_moe_v2.MoEV2CausalLMOutputWithPast attribute) attn_logit_softcapping (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider attribute) attribute_map (bridge.models.qwen3_asr.hf_qwen3_asr.configuration_qwen3_asr.Qwen3ASRThinkerConfig attribute) attributes (bridge.models.qwen3_asr.hf_qwen3_asr.processing_qwen3_asr.Qwen3ASRProcessor attribute) audio_end_token_id (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) audio_start_token_id (bridge.models.qwen3_asr.modeling_qwen3_asr.transformer_config.Qwen3ASRTransformerConfig attribute) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_omni.modeling_qwen25_omni.transformer_config.Qwen25OmniTransformerConfig attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) audio_token_id (bridge.models.qwen3_asr.modeling_qwen3_asr.transformer_config.Qwen3ASRTransformerConfig attribute) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_audio.qwen2_audio_provider.Qwen2AudioModelProvider attribute) (bridge.models.qwen_omni.modeling_qwen25_omni.transformer_config.Qwen25OmniTransformerConfig attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) auto_map_model_class (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM property) (bridge.models.hf_pretrained.vlm.PreTrainedVLM property) AutoBridge (class in bridge.models.conversion.auto_bridge) autocast_dtype (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.llama_nemotron.llama_nemotron_provider.LlamaNemotronHeterogeneousProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.training.mixed_precision.MixedPrecisionConfig attribute) autocast_enabled (bridge.training.mixed_precision.MixedPrecisionConfig attribute) AutocastTransformerLayer (class in bridge.models.gpt_full_te_layer_autocast_spec) AutoMapping (class in bridge.models.conversion.param_mapping) aux_loss (bridge.models.bailing.modeling_bailing_moe_v2.MoEV2CausalLMOutputWithPast attribute) axes_dims_rope (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) B backward() (bridge.models.qwen_vl.modelling_qwen3_vl.utils.AllGatherVisionEmbeddings static method) (bridge.peft.utils._All2AllHp2Sp static method) BAILINGMOEV2_INPUTS_DOCSTRING (in module bridge.models.bailing.modeling_bailing_moe_v2) BAILINGMOEV2_START_DOCSTRING (in module bridge.models.bailing.modeling_bailing_moe_v2) BailingMoeV2Attention (class in bridge.models.bailing.modeling_bailing_moe_v2) BailingMoeV2Bridge (class in bridge.models.bailing.bailing_moe2_bridge) BailingMoeV2Config (class in bridge.models.bailing.configuration_bailing_moe_v2) BailingMoeV2DecoderLayer (class in bridge.models.bailing.modeling_bailing_moe_v2) BailingMoeV2FlashAttention2 (class in bridge.models.bailing.modeling_bailing_moe_v2) BailingMoeV2ForCausalLM (class in bridge.models.bailing.modeling_bailing_moe_v2) BailingMoeV2Gate (class in bridge.models.bailing.modeling_bailing_moe_v2) BailingMoeV2MLP (class in bridge.models.bailing.modeling_bailing_moe_v2) BailingMoeV2Model (class in bridge.models.bailing.modeling_bailing_moe_v2) BailingMoeV2MTPLayer (class in bridge.models.bailing.modeling_bailing_moe_v2) BailingMoeV2PreTrainedModel (class in bridge.models.bailing.modeling_bailing_moe_v2) BailingMoeV2RMSNorm (class in bridge.models.bailing.modeling_bailing_moe_v2) BailingMoeV2RotaryEmbedding (class in bridge.models.bailing.modeling_bailing_moe_v2) BailingMoeV2SdpaAttention (class in bridge.models.bailing.modeling_bailing_moe_v2) BailingMoeV2SparseMoeBlock (class in bridge.models.bailing.modeling_bailing_moe_v2) barrier_and_log() (in module bridge.training.utils.log_utils) barrier_timeout (bridge.training.config.InProcessRestartConfig attribute) base_config_key (bridge.models.qwen3_asr.hf_qwen3_asr.configuration_qwen3_asr.Qwen3ASRTextConfig attribute) base_linear_forward() (bridge.peft.adapter_wrapper.AdapterWrapper method) base_linear_is_parallel (bridge.peft.utils.AdapterAttributes attribute) base_model_prefix (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2PreTrainedModel attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRPreTrainedModel attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerForConditionalGeneration attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerTextPreTrainedModel attribute) batch (bridge.diffusion.common.flow_matching.adapters.base.FlowMatchingContext attribute) batch() (bridge.data.energon.hf_encoder_task_encoder.HFEncoderVLMTaskEncoder method) (bridge.diffusion.data.common.diffusion_task_encoder_with_sp.DiffusionTaskEncoderWithSequencePacking method) (bridge.diffusion.data.flux.flux_taskencoder.FluxTaskEncoder method) (bridge.diffusion.data.wan.wan_taskencoder.WanTaskEncoder method) batch_p2p_comm (bridge.training.comm_overlap._CommOverlapConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) begin_index (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.FlowMatchEulerDiscreteScheduler property) beta_fast (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) beta_slow (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) bf16 (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider1B attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.llama_nemotron.llama_nemotron_provider.LlamaNemotronHeterogeneousProvider attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.mimo.mimo_provider.MimoModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) (bridge.models.mistral.mistral_provider.MistralModelProvider attribute) (bridge.models.model_provider.GetModelKwargs attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) (bridge.training.mixed_precision.MixedPrecisionConfig attribute) bf16_mixed() (in module bridge.training.mixed_precision) bf16_with_fp8_current_scaling_mixed() (in module bridge.training.mixed_precision) bf16_with_fp8_delayed_scaling_mixed() (in module bridge.training.mixed_precision) bf16_with_fp8_subchannel_scaling_mixed() (in module bridge.training.mixed_precision) bf16_with_mxfp8_mixed() (in module bridge.training.mixed_precision) bf16_with_nvfp4_mixed() (in module bridge.training.mixed_precision) bias_activation_fusion (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.gpt_provider.GPTProvider175B attribute) (bridge.models.llama_nemotron.llama_nemotron_provider.LlamaNemotronHeterogeneousProvider attribute) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) bias_dropout_add_fusion (bridge.models.gpt_provider.GPTProvider175B attribute) bias_dropout_fusion (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.llama_nemotron.llama_nemotron_provider.LlamaNemotronHeterogeneousProvider attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) bos_token_id (bridge.models.gemma_vl.gemma3_vl_provider.Gemma3VLModelProvider attribute) (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) (bridge.models.kimi_vl.kimi_k25_vl_provider.KimiK25VLModelProvider attribute) (bridge.models.qwen_audio.qwen2_audio_provider.Qwen2AudioModelProvider attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.qwen25_vl_provider.Qwen25VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) bridge module bridge.data module bridge.data.builders module bridge.data.builders.finetuning_dataset module bridge.data.builders.hf_dataset module bridge.data.datasets module bridge.data.datasets.fim_dataset module bridge.data.datasets.packed_parquet module bridge.data.datasets.packed_sequence module bridge.data.datasets.packing_utils module bridge.data.datasets.sft module bridge.data.datasets.utils module bridge.data.energon module bridge.data.energon.base_energon_datamodule module bridge.data.energon.energon_provider module bridge.data.energon.hf_encoder_task_encoder module bridge.data.energon.task_encoder_utils module bridge.data.finetuning module bridge.data.hf_processors module bridge.data.hf_processors.gsm8k module bridge.data.hf_processors.openmathinstruct2 module bridge.data.hf_processors.squad module bridge.data.iterator_utils module bridge.data.loaders module bridge.data.mimo module bridge.data.mimo.base_provider module bridge.data.mimo.collate module bridge.data.mimo.dataset module bridge.data.mimo.dp_utils module bridge.data.mimo.hf_provider module bridge.data.mimo.loaders module bridge.data.mimo.mock_provider module bridge.data.samplers module bridge.data.utils module bridge.data.vlm_datasets module bridge.data.vlm_datasets.collate module bridge.data.vlm_datasets.conversation_dataset module bridge.data.vlm_datasets.hf_dataset_makers module bridge.data.vlm_datasets.hf_provider module bridge.data.vlm_datasets.mock_provider module bridge.data.vlm_datasets.preloaded_provider module bridge.data.vlm_datasets.token_utils module bridge.diffusion module bridge.diffusion.base module bridge.diffusion.common module bridge.diffusion.common.flow_matching module bridge.diffusion.common.flow_matching.adapters module bridge.diffusion.common.flow_matching.adapters.base module bridge.diffusion.common.flow_matching.adapters.simple module bridge.diffusion.common.flow_matching.flow_matching_pipeline module bridge.diffusion.conversion module bridge.diffusion.conversion.flux module bridge.diffusion.conversion.flux.flux_bridge module bridge.diffusion.conversion.flux.flux_hf_pretrained module bridge.diffusion.conversion.wan module bridge.diffusion.conversion.wan.wan_bridge module bridge.diffusion.conversion.wan.wan_hf_pretrained module bridge.diffusion.data module bridge.diffusion.data.common module bridge.diffusion.data.common.diffusion_energon_datamodule module bridge.diffusion.data.common.diffusion_sample module bridge.diffusion.data.common.diffusion_task_encoder_with_sp module bridge.diffusion.data.common.sequence_packing_utils module bridge.diffusion.data.flux module bridge.diffusion.data.flux.flux_energon_datamodule module bridge.diffusion.data.flux.flux_mock_datamodule module bridge.diffusion.data.flux.flux_taskencoder module bridge.diffusion.data.wan module bridge.diffusion.data.wan.wan_energon_datamodule module bridge.diffusion.data.wan.wan_mock_datamodule module bridge.diffusion.data.wan.wan_taskencoder module bridge.diffusion.models module bridge.diffusion.models.common module bridge.diffusion.models.common.dit_attention module bridge.diffusion.models.common.dit_embeddings module bridge.diffusion.models.common.normalization module bridge.diffusion.models.flux module bridge.diffusion.models.flux.flow_matching module bridge.diffusion.models.flux.flow_matching.flux_adapter module bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline module bridge.diffusion.models.flux.flux_attention module bridge.diffusion.models.flux.flux_layer_spec module bridge.diffusion.models.flux.flux_model module bridge.diffusion.models.flux.flux_provider module bridge.diffusion.models.flux.flux_step module bridge.diffusion.models.flux.layers module bridge.diffusion.models.wan module bridge.diffusion.models.wan.flow_matching module bridge.diffusion.models.wan.flow_matching.flow_inference_pipeline module bridge.diffusion.models.wan.flow_matching.flow_matching_pipeline_wan module bridge.diffusion.models.wan.inference module bridge.diffusion.models.wan.inference.utils module bridge.diffusion.models.wan.rope_utils module bridge.diffusion.models.wan.utils module bridge.diffusion.models.wan.wan_layer_spec module bridge.diffusion.models.wan.wan_model module bridge.diffusion.models.wan.wan_provider module bridge.diffusion.models.wan.wan_step module bridge.diffusion.recipes module bridge.diffusion.recipes.flux module bridge.diffusion.recipes.flux.flux module bridge.diffusion.recipes.wan module bridge.diffusion.recipes.wan.wan module bridge.models module bridge.models.bailing module bridge.models.bailing.bailing_moe2_bridge module bridge.models.bailing.configuration_bailing_moe_v2 module bridge.models.bailing.modeling_bailing_moe_v2 module bridge.models.common module bridge.models.common.base module bridge.models.common.unimodal module bridge.models.config module bridge.models.conversion module bridge.models.conversion.auto_bridge module bridge.models.conversion.mapping_registry module bridge.models.conversion.model_bridge module bridge.models.conversion.param_mapping module bridge.models.conversion.peft_bridge module bridge.models.conversion.quant_mapping module bridge.models.conversion.transformers_compat module bridge.models.conversion.utils module bridge.models.decorators module bridge.models.decorators.dispatch module bridge.models.decorators.torchrun module bridge.models.deepseek module bridge.models.deepseek.common module bridge.models.deepseek.deepseek_v2_bridge module bridge.models.deepseek.deepseek_v3_bridge module bridge.models.distillation_provider module bridge.models.gemma module bridge.models.gemma.gemma2_bridge module bridge.models.gemma.gemma2_provider module bridge.models.gemma.gemma3_bridge module bridge.models.gemma.gemma3_provider module bridge.models.gemma.gemma_bridge module bridge.models.gemma.gemma_provider module bridge.models.gemma.modules module bridge.models.gemma_vl module bridge.models.gemma_vl.gemma3_vl_bridge module bridge.models.gemma_vl.gemma3_vl_provider module bridge.models.gemma_vl.modeling_gemma3_vl module bridge.models.glm module bridge.models.glm.glm45_bridge module bridge.models.glm.glm_moe_mappings module bridge.models.glm_vl module bridge.models.glm_vl.glm_45v_bridge module bridge.models.glm_vl.glm_45v_provider module bridge.models.glm_vl.modeling_glm_45v module bridge.models.gpt module bridge.models.gpt.gpt_builder module bridge.models.gpt_full_te_layer_autocast_spec module bridge.models.gpt_oss module bridge.models.gpt_oss.gpt_oss_bridge module bridge.models.gpt_provider module bridge.models.hf_pretrained module bridge.models.hf_pretrained.base module bridge.models.hf_pretrained.causal_lm module bridge.models.hf_pretrained.safe_config_loader module bridge.models.hf_pretrained.state module bridge.models.hf_pretrained.utils module bridge.models.hf_pretrained.vlm module bridge.models.kimi module bridge.models.kimi.kimi_bridge module bridge.models.kimi_vl module bridge.models.kimi_vl.kimi_k25_vl_bridge module bridge.models.kimi_vl.kimi_k25_vl_provider module bridge.models.kimi_vl.modeling_kimi_k25_vl module bridge.models.kimi_vl.utils module bridge.models.llama module bridge.models.llama.llama4_utils module bridge.models.llama.llama_bridge module bridge.models.llama_nemotron module bridge.models.llama_nemotron.llama_nemotron_bridge module bridge.models.llama_nemotron.llama_nemotron_provider module bridge.models.mamba module bridge.models.mamba.mamba_builder module bridge.models.mamba.mamba_provider module bridge.models.mimo module bridge.models.mimo.llava_provider module bridge.models.mimo.mimo_bridge module bridge.models.mimo.mimo_builder module bridge.models.mimo.mimo_config module bridge.models.mimo.mimo_ddp module bridge.models.mimo.mimo_provider module bridge.models.minimax_m2 module bridge.models.minimax_m2.minimax_m2_bridge module bridge.models.minimax_m2.minimax_m2_provider module bridge.models.ministral3 module bridge.models.ministral3.ministral3_bridge module bridge.models.ministral3.ministral3_provider module bridge.models.ministral3.modeling_ministral3 module bridge.models.mistral module bridge.models.mistral.mistral_bridge module bridge.models.mistral.mistral_provider module bridge.models.mla_provider module bridge.models.model_provider module bridge.models.nemotron module bridge.models.nemotron.nemotron_bridge module bridge.models.nemotron_vl module bridge.models.nemotron_vl.modeling_nemotron_vl module bridge.models.nemotron_vl.nemotron_vl_bridge module bridge.models.nemotron_vl.nemotron_vl_provider module bridge.models.nemotron_vl.nemotron_vl_utils module bridge.models.nemotronh module bridge.models.nemotronh.nemotron_h_bridge module bridge.models.nemotronh.nemotron_h_provider module bridge.models.olmoe module bridge.models.olmoe.olmoe_bridge module bridge.models.olmoe.olmoe_provider module bridge.models.qwen module bridge.models.qwen.qwen2_bridge module bridge.models.qwen.qwen3_bridge module bridge.models.qwen.qwen3_moe_bridge module bridge.models.qwen.qwen3_next_bridge module bridge.models.qwen3_asr module bridge.models.qwen3_asr.hf_qwen3_asr module bridge.models.qwen3_asr.hf_qwen3_asr.configuration_qwen3_asr module bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr module bridge.models.qwen3_asr.hf_qwen3_asr.processing_qwen3_asr module bridge.models.qwen3_asr.modeling_qwen3_asr module bridge.models.qwen3_asr.modeling_qwen3_asr.model module bridge.models.qwen3_asr.modeling_qwen3_asr.rope module bridge.models.qwen3_asr.modeling_qwen3_asr.thinker_model module bridge.models.qwen3_asr.modeling_qwen3_asr.transformer_config module bridge.models.qwen3_asr.qwen3_asr_bridge module bridge.models.qwen3_asr.qwen3_asr_provider module bridge.models.qwen_audio module bridge.models.qwen_audio.modeling_qwen2_audio module bridge.models.qwen_audio.qwen2_audio_bridge module bridge.models.qwen_audio.qwen2_audio_provider module bridge.models.qwen_omni module bridge.models.qwen_omni.modeling_qwen25_omni module bridge.models.qwen_omni.modeling_qwen25_omni.model module bridge.models.qwen_omni.modeling_qwen25_omni.rope module bridge.models.qwen_omni.modeling_qwen25_omni.thinker_model module bridge.models.qwen_omni.modeling_qwen25_omni.transformer_config module bridge.models.qwen_omni.qwen25_omni_bridge module bridge.models.qwen_omni.qwen25_omni_provider module bridge.models.qwen_vl module bridge.models.qwen_vl.modeling_qwen25_vl module bridge.models.qwen_vl.modelling_qwen3_vl module bridge.models.qwen_vl.modelling_qwen3_vl.attention module bridge.models.qwen_vl.modelling_qwen3_vl.model module bridge.models.qwen_vl.modelling_qwen3_vl.rope module bridge.models.qwen_vl.modelling_qwen3_vl.text_model module bridge.models.qwen_vl.modelling_qwen3_vl.transformer_block module bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config module bridge.models.qwen_vl.modelling_qwen3_vl.utils module bridge.models.qwen_vl.modelling_qwen3_vl.vision_model module bridge.models.qwen_vl.qwen25_vl_bridge module bridge.models.qwen_vl.qwen25_vl_provider module bridge.models.qwen_vl.qwen35_vl_bridge module bridge.models.qwen_vl.qwen35_vl_provider module bridge.models.qwen_vl.qwen3_vl_bridge module bridge.models.qwen_vl.qwen3_vl_provider module bridge.models.qwen_vl.qwen3_vl_step module bridge.models.sarvam module bridge.models.sarvam.common module bridge.models.sarvam.sarvam_mla_bridge module bridge.models.sarvam.sarvam_moe_bridge module bridge.models.sarvam.sarvam_provider module bridge.models.t5_provider module bridge.models.transformer_config module bridge.package_info module bridge.peft module bridge.peft.adapter_wrapper module bridge.peft.base module bridge.peft.canonical_lora module bridge.peft.dora module bridge.peft.dora_layers module bridge.peft.lora module bridge.peft.lora_layers module bridge.peft.module_matcher module bridge.peft.recompute module bridge.peft.utils module bridge.peft.walk_utils module bridge.recipes module bridge.recipes.common module bridge.recipes.deepseek module bridge.recipes.deepseek.deepseek_v2 module bridge.recipes.deepseek.deepseek_v3 module bridge.recipes.flux module bridge.recipes.gemma module bridge.recipes.gemma.gemma2 module bridge.recipes.gemma.gemma3 module bridge.recipes.gemma3_vl module bridge.recipes.gemma3_vl.gemma3_vl module bridge.recipes.glm module bridge.recipes.glm.glm45 module bridge.recipes.glm_vl module bridge.recipes.glm_vl.glm_45v module bridge.recipes.gpt module bridge.recipes.gpt.gpt3_175b module bridge.recipes.gpt.vanilla_gpt module bridge.recipes.gpt_oss module bridge.recipes.gpt_oss.gpt_oss module bridge.recipes.kimi_vl module bridge.recipes.kimi_vl.kimi_k25_vl module bridge.recipes.llama module bridge.recipes.llama.llama2 module bridge.recipes.llama.llama3 module bridge.recipes.ministral3 module bridge.recipes.ministral3.ministral3 module bridge.recipes.moonlight module bridge.recipes.moonlight.moonlight_16b module bridge.recipes.nemotron_vl module bridge.recipes.nemotron_vl.nemotron_nano_v2_vl module bridge.recipes.nemotronh module bridge.recipes.nemotronh.nemotron_3_nano module bridge.recipes.nemotronh.nemotron_3_super module bridge.recipes.nemotronh.nemotron_nano_v2 module bridge.recipes.nemotronh.nemotronh module bridge.recipes.olmoe module bridge.recipes.olmoe.olmoe_7b module bridge.recipes.qwen module bridge.recipes.qwen.qwen2 module bridge.recipes.qwen.qwen3 module bridge.recipes.qwen.qwen3_moe module bridge.recipes.qwen.qwen3_next module bridge.recipes.qwen2_audio module bridge.recipes.qwen2_audio.qwen2_audio module bridge.recipes.qwen_vl module bridge.recipes.qwen_vl.qwen25_vl module bridge.recipes.qwen_vl.qwen25_vl_dataset module bridge.recipes.qwen_vl.qwen35_vl module bridge.recipes.qwen_vl.qwen3_vl module bridge.recipes.run_plugins module bridge.recipes.utils module bridge.recipes.utils.dataset_utils module bridge.recipes.utils.finetune_utils module bridge.recipes.utils.optimizer_utils module bridge.recipes.utils.tokenizer_utils module bridge.recipes.wan module bridge.training module bridge.training.audio_lm_step module bridge.training.callbacks module bridge.training.checkpointing module bridge.training.comm_overlap module bridge.training.config module bridge.training.distill module bridge.training.eval module bridge.training.fault_tolerance module bridge.training.finetune module bridge.training.flex_dispatcher_backend module bridge.training.forward_step_func_types module bridge.training.gpt_step module bridge.training.initialize module bridge.training.inprocess_restart module bridge.training.llava_step module bridge.training.losses module bridge.training.mimo_parallel_utils module bridge.training.mimo_step module bridge.training.mixed_precision module bridge.training.mlm_compat module bridge.training.mlm_compat.activations module bridge.training.mlm_compat.arguments module bridge.training.mlm_compat.model module bridge.training.model_load_save module bridge.training.nvrx_straggler module bridge.training.optim module bridge.training.post_training module bridge.training.post_training.checkpointing module bridge.training.post_training.distillation module bridge.training.pretrain module bridge.training.pretrain_mimo module bridge.training.profiling module bridge.training.setup module bridge.training.setup_mimo module bridge.training.state module bridge.training.tensor_inspect module bridge.training.tokenizers module bridge.training.tokenizers.config module bridge.training.tokenizers.tokenizer module bridge.training.train module bridge.training.train_mimo module bridge.training.utils module bridge.training.utils.batch_utils module bridge.training.utils.checkpoint_utils module bridge.training.utils.comet_utils module bridge.training.utils.config_utils module bridge.training.utils.flop_utils module bridge.training.utils.log_utils module bridge.training.utils.mlflow_utils module bridge.training.utils.moe_token_drop module bridge.training.utils.omegaconf_utils module bridge.training.utils.packed_seq_utils module bridge.training.utils.padding_utils module bridge.training.utils.pg_utils module bridge.training.utils.sig_utils module bridge.training.utils.theoretical_memory_utils module bridge.training.utils.train_utils module bridge.training.utils.visual_inputs module bridge.training.utils.wandb_utils module bridge.training.vlm_step module bridge.utils module bridge.utils.activation_map module bridge.utils.common_utils module bridge.utils.decorators module bridge.utils.fusions module bridge.utils.import_utils module bridge.utils.instantiate_utils module bridge.utils.safe_pickle module bridge.utils.slurm_utils module bridge.utils.vocab_utils module bridge.utils.yaml_utils module broadcast_from_pp_rank() (bridge.models.conversion.param_mapping.MegatronParamMapping method) broadcast_obj_from_pp_rank() (bridge.models.conversion.param_mapping.MegatronParamMapping method) broadcast_tensor_to_tp_ranks() (bridge.models.conversion.param_mapping.MegatronParamMapping method) bucket_size (bridge.training.comm_overlap._CommOverlapConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) build() (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder method) (bridge.data.energon.base_energon_datamodule.EnergonMultiModalDataModule method) build_adapter_config_dict() (in module bridge.models.conversion.peft_bridge) build_adapter_conversion_tasks() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) build_and_load_model() (in module bridge.training.model_load_save) build_config_overrides() (bridge.training.config.OptimizerConfigOverrideProvider method) build_conversion_tasks() (bridge.models.conversion.model_bridge.MegatronModelBridge method) (bridge.models.deepseek.deepseek_v2_bridge.DeepSeekV2Bridge method) (bridge.models.deepseek.deepseek_v3_bridge.DeepSeekV3Bridge method) (bridge.models.glm.glm45_bridge.GLM45Bridge method) (bridge.models.glm_vl.glm_45v_bridge.GLM45VBridge method) (bridge.models.kimi_vl.kimi_k25_vl_bridge.KimiK25VLBridge method) (bridge.models.nemotronh.nemotron_h_bridge.NemotronHBridge method) build_datasets() (bridge.data.energon.energon_provider.EnergonProvider method) (bridge.data.mimo.base_provider.MimoDatasetProvider method) (bridge.data.mimo.hf_provider.HFMimoDatasetProvider method) (bridge.data.mimo.mock_provider.MockMimoProvider method) (bridge.data.vlm_datasets.hf_provider.HFDatasetConversationProvider method) (bridge.data.vlm_datasets.mock_provider.MockVLMConversationProvider method) (bridge.data.vlm_datasets.preloaded_provider.PreloadedVLMConversationProvider method) (bridge.diffusion.data.common.diffusion_energon_datamodule.DiffusionDataModuleConfig method) (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDataModuleConfig method) (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDatasetConfig method) (bridge.diffusion.data.flux.flux_mock_datamodule.FluxMockDataModuleConfig method) (bridge.diffusion.data.wan.wan_energon_datamodule.WanDataModuleConfig method) (bridge.diffusion.data.wan.wan_energon_datamodule.WanDatasetConfig method) (bridge.diffusion.data.wan.wan_mock_datamodule.WanMockDataModuleConfig method) (bridge.recipes.qwen_vl.qwen25_vl_dataset.MockQwen25VLDatasetProvider method) (bridge.training.config.DatasetProvider method) build_distributed_models() (bridge.models.common.base.ModelBuilder method) (bridge.models.gpt.gpt_builder.GPTModelBuilder method) (bridge.models.mamba.mamba_builder.MambaModelBuilder method) build_export_fp8_tasks() (bridge.models.conversion.model_bridge.MegatronModelBridge method) build_hypercomm_grids() (in module bridge.models.mimo.mimo_builder) build_index_files() (in module bridge.data.datasets.utils) build_index_from_memdata() (in module bridge.data.datasets.utils) build_infra() (bridge.models.mimo.mimo_provider.MimoModelProvider method) build_mimo_data_loaders() (in module bridge.data.mimo.loaders) build_model() (bridge.models.common.base.ModelBuilder method) (bridge.models.gpt.gpt_builder.GPTModelBuilder method) (bridge.models.mamba.mamba_builder.MambaModelBuilder method) build_packed_seq_params() (bridge.models.qwen_vl.modelling_qwen3_vl.vision_model.Qwen3VLVisionModel method) build_pg_collection_for_schedule() (in module bridge.training.mimo_parallel_utils) build_pretraining_data_loader() (in module bridge.data.samplers) build_tokenizer() (in module bridge.training.tokenizers.tokenizer) build_train_valid_test_data_iterators() (in module bridge.data.loaders) build_train_valid_test_data_loaders() (in module bridge.data.loaders) build_train_valid_test_datasets() (in module bridge.data.loaders) build_virtual_pipeline_stages() (in module bridge.models.common.unimodal) BuildConfigT (in module bridge.models.common.base) builder (bridge.models.common.base.ModelConfig attribute) (bridge.models.gpt.gpt_builder.GPTModelConfig attribute) (bridge.models.mamba.mamba_builder.MambaModelConfig attribute) BulkOverlapCfg (class in bridge.training.comm_overlap) C cache_image() (in module bridge.diffusion.models.wan.inference.utils) cache_video() (in module bridge.diffusion.models.wan.inference.utils) calc_dtensor_params_l2_norm() (in module bridge.training.utils.train_utils) calc_ft_timeouts (bridge.recipes.run_plugins.FaultTolerancePlugin attribute) (bridge.recipes.run_plugins.FaultTolerancePluginScriptArgs attribute) (bridge.training.config.FaultToleranceConfig attribute) calc_individual_gpu_perf (bridge.training.config.NVRxStragglerDetectionConfig attribute) calc_params_l2_norm() (in module bridge.training.utils.train_utils) calc_relative_gpu_perf (bridge.training.config.NVRxStragglerDetectionConfig attribute) calculate_avg_seqlen() (in module bridge.data.datasets.packing_utils) calculate_padded_vocab_size() (in module bridge.utils.vocab_utils) CALL (bridge.utils.instantiate_utils._Keys attribute) callable_to_str() (in module bridge.utils.activation_map) Callback (class in bridge.training.callbacks) CallbackContext (class in bridge.training.callbacks) CallbackManager (class in bridge.training.callbacks) can_enable_gradient_accumulation_fusion() (in module bridge.utils.fusions) can_handle() (bridge.models.conversion.auto_bridge.AutoBridge class method) canonical_mapping (bridge.peft.module_matcher.ModuleMatcher attribute) CanonicalLoRA (class in bridge.peft.canonical_lora) CausalLMType (in module bridge.models.hf_pretrained.causal_lm) cfg (bridge.training.state.GlobalState property) cfg_dropout_prob (bridge.diffusion.common.flow_matching.adapters.base.FlowMatchingContext attribute) cga_size (bridge.training.comm_overlap.BulkOverlapCfg attribute) (bridge.training.comm_overlap.PipelineOverlapCfg attribute) (bridge.training.comm_overlap.RingExchangeOverlapCfg attribute) chat_template (bridge.training.tokenizers.config.TokenizerConfig attribute) ChatMLSample (class in bridge.data.energon.task_encoder_utils) ChatMLWebdataset (class in bridge.data.energon.task_encoder_utils) check_nvrx_straggler_detection() (in module bridge.training.nvrx_straggler) check_optimizer_step_success (bridge.training.config.TrainingConfig attribute) check_stragglers() (bridge.training.nvrx_straggler.NVRxStragglerDetectionManager method) checkpoint (bridge.training.config.ConfigContainer attribute) checkpoint_and_decide_exit() (in module bridge.training.train) checkpoint_exists() (in module bridge.training.utils.checkpoint_utils) checkpoint_manager (bridge.training.setup.SetupOutput attribute) (bridge.training.setup_mimo.MimoSetupOutput attribute) CheckpointConfig (class in bridge.training.config) checkpointing_context (bridge.training.checkpointing.DefaultCheckpointManager property) CheckpointLoadContext (class in bridge.training.checkpointing) CheckpointManager (class in bridge.training.checkpointing) CheckpointSaveContext (class in bridge.training.checkpointing) CheckpointType (class in bridge.training.checkpointing) ChunkedMapping (class in bridge.models.conversion.param_mapping) chunkify() (in module bridge.models.llama.llama4_utils) chunkify_cu_seqlens() (in module bridge.models.llama.llama4_utils) ckpt_path (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) clean_up_tokenization_spaces (bridge.models.hf_pretrained.causal_lm.DecodeKwargs attribute) cleanup_old_non_persistent_checkpoint() (in module bridge.training.checkpointing) clear_broadcast_cache() (bridge.models.conversion.param_mapping.MegatronParamMapping method) clear_tensor_spec_output_cache() (bridge.models.conversion.param_mapping.MegatronParamMapping method) ClipConfig (class in bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline) close() (bridge.data.datasets.packed_parquet.GPTSFTPackedParquetDataset method) CodeGemmaModelProvider2B (class in bridge.models.gemma.gemma_provider) CodeGemmaModelProvider7B (class in bridge.models.gemma.gemma_provider) collapse_thw() (in module bridge.models.qwen_vl.modelling_qwen3_vl.utils) collate_fn() (bridge.data.datasets.sft.GPTSFTChatDataset method) (bridge.data.datasets.sft.GPTSFTDataset method) (bridge.data.datasets.sft.GPTSFTPackedDataset method) COLLATE_FNS (in module bridge.data.vlm_datasets.collate) collate_impl (bridge.data.vlm_datasets.hf_provider.HFDatasetConversationProvider attribute) ColumnParallelMapping (class in bridge.models.conversion.param_mapping) comet_api_key (bridge.training.config.LoggerConfig attribute) comet_experiment_name (bridge.training.config.LoggerConfig attribute) comet_logger (bridge.training.state.GlobalState property) comet_project (bridge.training.config.LoggerConfig attribute) comet_tags (bridge.training.config.LoggerConfig attribute) comet_workspace (bridge.training.config.LoggerConfig attribute) CometPlugin (class in bridge.recipes.run_plugins) CometPluginScriptArgs (class in bridge.recipes.run_plugins) comm_overlap (bridge.training.config.ConfigContainer attribute) comm_overlap_config (bridge.recipes.qwen_vl.qwen3_vl.Qwen3VLCommonKwargs attribute) CommOverlapConfig (class in bridge.training.comm_overlap) completion_timeout (bridge.training.config.InProcessRestartConfig attribute) compose_hooks() (in module bridge.models.common.base) compute_activation_memory() (in module bridge.training.utils.theoretical_memory_utils) compute_loss() (bridge.diffusion.common.flow_matching.flow_matching_pipeline.FlowMatchingPipeline method) (bridge.diffusion.models.wan.flow_matching.flow_matching_pipeline_wan.WanFlowMatchingPipeline method) compute_throughputs_and_append_to_progress_log() (in module bridge.training.train) compute_weight_and_optimizer_memory() (in module bridge.training.utils.theoretical_memory_utils) ConcatenatedQKVMapping (class in bridge.models.conversion.param_mapping) config (bridge.models.hf_pretrained.base.PreTrainedBase property) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRAudioEncoder attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRPreTrainedModel attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerForConditionalGeneration attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerTextModel attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerTextPreTrainedModel attribute) config_class (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2PreTrainedModel attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRForConditionalGeneration attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerTextModel attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerTextPreTrainedModel attribute) CONFIG_FILE (in module bridge.training.utils.checkpoint_utils) CONFIG_MAPPING (bridge.models.conversion.model_bridge.MegatronModelBridge attribute) (bridge.models.nemotron.nemotron_bridge.NemotronBridge attribute) (bridge.models.nemotron_vl.nemotron_vl_bridge.NemotronVLBridge attribute) (bridge.models.nemotronh.nemotron_h_bridge.NemotronHBridge attribute) CONFIG_NAME (bridge.models.model_provider.ModelProviderMixin attribute) ConfigContainer (class in bridge.training.config) ConfigFormat (in module bridge.models.config) ConfigProtocol (class in bridge.models.config) configure_model() (bridge.models.gemma_vl.modeling_gemma3_vl.Gemma3VLMultimodalProjectorConfig method) conform_config_to_reference() (in module bridge.models.conversion.utils) consumed_train_samples (bridge.training.state.TrainState attribute) consumed_valid_samples (bridge.training.state.TrainState attribute) context_dim (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDatasetConfig attribute) (bridge.diffusion.data.flux.flux_mock_datamodule.FluxMockDataModuleConfig attribute) (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) context_embeddings (bridge.diffusion.data.common.diffusion_sample.DiffusionSample attribute) context_embeddings_dim (bridge.diffusion.data.wan.wan_energon_datamodule.WanDatasetConfig attribute) (bridge.diffusion.data.wan.wan_mock_datamodule.WanMockDataModuleConfig attribute) context_mask (bridge.diffusion.data.common.diffusion_sample.DiffusionSample attribute) context_parallel_size (bridge.models.mimo.mimo_config.ModuleParallelismConfig attribute) (bridge.models.model_provider.ModelParallelKwargs attribute) (bridge.recipes.qwen_vl.qwen3_vl.Qwen3VLCommonKwargs attribute) context_seq_len (bridge.diffusion.data.wan.wan_energon_datamodule.WanDatasetConfig attribute) (bridge.diffusion.data.wan.wan_mock_datamodule.WanMockDataModuleConfig attribute) conversation (bridge.data.energon.task_encoder_utils.ChatMLSample attribute) convert_to_amax_map() (in module bridge.models.conversion.quant_mapping) convert_to_distillation_provider() (in module bridge.models.distillation_provider) cook() (in module bridge.diffusion.data.common.diffusion_task_encoder_with_sp) (in module bridge.diffusion.data.flux.flux_taskencoder) (in module bridge.diffusion.data.wan.wan_taskencoder) cook_chatml_sample() (in module bridge.data.energon.task_encoder_utils) cookers (bridge.diffusion.data.common.diffusion_task_encoder_with_sp.DiffusionTaskEncoderWithSequencePacking attribute) (bridge.diffusion.data.flux.flux_taskencoder.FluxTaskEncoder attribute) (bridge.diffusion.data.wan.wan_taskencoder.WanTaskEncoder attribute) core_attention (bridge.diffusion.models.common.dit_attention.DiTCrossAttentionSubmodules attribute) (bridge.diffusion.models.flux.flux_attention.JointSelfAttentionSubmodules attribute) cp_comm_type (bridge.models.mistral.mistral_provider.MistralSmall3ModelProvider24B attribute) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) cp_size (bridge.recipes.run_plugins.PerfEnvPlugin attribute) create_attention_mask (bridge.data.vlm_datasets.mock_provider.MockVLMConversationProvider attribute) (bridge.recipes.qwen_vl.qwen25_vl_dataset.MockQwen25VLDatasetProvider attribute) create_bridge_decorator() (in module bridge.models.conversion.model_bridge) create_checkpoint_manager() (in module bridge.training.checkpointing) create_hist() (in module bridge.data.datasets.packing_utils) create_masked_next_token_loss_function() (in module bridge.training.losses) create_multiturn_loss_mask_by_search() (in module bridge.data.vlm_datasets.collate) create_omegaconf_dict_config() (in module bridge.training.utils.omegaconf_utils) create_packing_strategy() (in module bridge.data.datasets.packing_utils) create_sft_dataset() (in module bridge.data.datasets.sft) cross_entropy_fusion_impl (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) cross_entropy_loss_fusion (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) crossattn_emb_size (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider14B attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider1_3B attribute) cuda_graph_scope (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) cuda_graph_warmup_steps (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) curr_eval_iter_idx (bridge.training.state.FaultToleranceState attribute) custom_manager_class (bridge.training.config.CheckpointConfig attribute) cyclic_iter() (in module bridge.data.energon.base_energon_datamodule) (in module bridge.data.loaders) D data_parallel_random_init (bridge.models.model_provider.GetModelKwargs attribute) data_parallel_size (bridge.models.mimo.mimo_config.ModuleParallelismConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) data_path (bridge.training.config.GPTDatasetConfig attribute) data_sharding (bridge.training.config.DataloaderConfig attribute) data_type (bridge.diffusion.common.flow_matching.adapters.base.FlowMatchingContext attribute) DataclassInstance (in module bridge.training.utils.omegaconf_utils) DataclassT (in module bridge.models.conversion.auto_bridge) dataloader_type (bridge.data.energon.energon_provider.EnergonProvider attribute) (bridge.data.mimo.mock_provider.MockMimoProvider attribute) (bridge.data.vlm_datasets.hf_provider.HFDatasetConversationProvider attribute) (bridge.data.vlm_datasets.mock_provider.MockVLMConversationProvider attribute) (bridge.data.vlm_datasets.preloaded_provider.PreloadedVLMConversationProvider attribute) (bridge.diffusion.data.common.diffusion_energon_datamodule.DiffusionDataModuleConfig attribute) (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDataModuleConfig attribute) (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDatasetConfig attribute) (bridge.diffusion.data.flux.flux_mock_datamodule.FluxMockDataModuleConfig attribute) (bridge.diffusion.data.wan.wan_energon_datamodule.WanDataModuleConfig attribute) (bridge.diffusion.data.wan.wan_energon_datamodule.WanDatasetConfig attribute) (bridge.diffusion.data.wan.wan_mock_datamodule.WanMockDataModuleConfig attribute) (bridge.training.config.DataloaderConfig attribute) (bridge.training.config.FinetuningDatasetConfig attribute) DataloaderConfig (class in bridge.training.config) dataset (bridge.training.config.ConfigContainer attribute) dataset_dict (bridge.data.builders.hf_dataset.HFDatasetConfig attribute) dataset_kwargs (bridge.training.config.FinetuningDatasetConfig attribute) dataset_name (bridge.data.builders.hf_dataset.HFDatasetConfig attribute) dataset_root (bridge.training.config.FinetuningDatasetConfig attribute) dataset_subset (bridge.data.builders.hf_dataset.HFDatasetConfig attribute) DATASET_TYPES (in module bridge.recipes.utils.dataset_utils) DatasetBuildContext (class in bridge.training.config) DatasetProvider (class in bridge.training.config) datasets_provider() (bridge.data.energon.base_energon_datamodule.EnergonMultiModalDataModule method) (bridge.diffusion.data.common.diffusion_energon_datamodule.DiffusionDataModule method) DataT (in module bridge.data.iterator_utils) ddp (bridge.training.config.ConfigContainer attribute) ddp_config (bridge.models.model_provider.GetModelKwargs attribute) deallocate_pipeline_outputs (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) decode() (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) DecodeKwargs (class in bridge.models.hf_pretrained.causal_lm) decoder (bridge.models.qwen_vl.modeling_qwen25_vl.Qwen25VLModel property) (bridge.models.qwen_vl.modelling_qwen3_vl.model.Qwen3VLModel property) decoder_sparse_step (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) deepseek_v2_lite_pretrain_config() (in module bridge.recipes.deepseek.deepseek_v2) deepseek_v2_pretrain_config() (in module bridge.recipes.deepseek.deepseek_v2) deepseek_v3_pretrain_config() (in module bridge.recipes.deepseek.deepseek_v3) deepseek_v3_pretrain_config_32nodes() (in module bridge.recipes.deepseek.deepseek_v3) DeepSeekV2Bridge (class in bridge.models.deepseek.deepseek_v2_bridge) DeepSeekV3Bridge (class in bridge.models.deepseek.deepseek_v3_bridge) deepstack_visual_indexes (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config.Qwen3VLTransformerConfig attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) default_collate_fn() (in module bridge.data.vlm_datasets.collate) DEFAULT_CONFIG_FORMAT (bridge.models.model_provider.ModelProviderMixin attribute) default_gsm8k_config() (in module bridge.recipes.utils.finetune_utils) default_layer_spec() (in module bridge.models.gpt.gpt_builder) (in module bridge.models.gpt_provider) DEFAULT_NEMO_CACHE_HOME (in module bridge.data.datasets.sft) DEFAULT_NEMO_DATASETS_CACHE (in module bridge.data.datasets.sft) DEFAULT_NEMO_MODELS_CACHE (in module bridge.data.datasets.sft) DEFAULT_NULL_TOKENIZER_VOCAB_SIZE (in module bridge.recipes.utils.tokenizer_utils) default_openmathinstruct2_config() (in module bridge.recipes.utils.finetune_utils) default_pack_path (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder property) default_peft_config() (in module bridge.recipes.utils.finetune_utils) default_squad_config() (in module bridge.recipes.utils.finetune_utils) DefaultCheckpointManager (class in bridge.training.checkpointing) defer_embedding_wgrad_compute (bridge.training.comm_overlap._CommOverlapConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) delay_wgrad_compute (bridge.training.comm_overlap._CommOverlapConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) delete_extra_state() (in module bridge.training.checkpointing) delete_raw (bridge.data.builders.hf_dataset.HFDatasetConfig attribute) denormalize() (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.FluxInferencePipeline static method) dequantize_int4() (in module bridge.models.kimi_vl.utils) describe() (bridge.models.conversion.mapping_registry.MegatronMappingRegistry method) destroy_global_state() (in module bridge.training.initialize) determine_task_type() (bridge.diffusion.common.flow_matching.flow_matching_pipeline.FlowMatchingPipeline method) (bridge.diffusion.models.wan.flow_matching.flow_matching_pipeline_wan.WanFlowMatchingPipeline method) device (bridge.diffusion.common.flow_matching.adapters.base.FlowMatchingContext attribute) (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.ClipConfig attribute) (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.T5Config attribute) DictStateSource (class in bridge.models.hf_pretrained.state) DiffusionDataModule (class in bridge.diffusion.data.common.diffusion_energon_datamodule) DiffusionDataModuleConfig (class in bridge.diffusion.data.common.diffusion_energon_datamodule) DiffusionSample (class in bridge.diffusion.data.common.diffusion_sample) DiffusionTaskEncoderWithSequencePacking (class in bridge.diffusion.data.common.diffusion_task_encoder_with_sp) dim (bridge.models.conversion.peft_bridge.AdapterWeight attribute) (bridge.models.conversion.peft_bridge.AdapterWeightConversionTask attribute) (bridge.peft.canonical_lora.CanonicalLoRA attribute) (bridge.peft.dora.DoRA attribute) (bridge.peft.lora.LoRA attribute) DirectMapping (class in bridge.models.conversion.param_mapping) disable_adapter() (bridge.peft.base.PEFT method) disable_adapter_layers() (bridge.peft.adapter_wrapper.AdapterWrapper method) (bridge.peft.base.PEFT method) (bridge.peft.lora_layers.LinearAdapter method) (bridge.peft.lora_layers.TELinearAdapter method) disable_forward_pre_hook() (in module bridge.training.train) disable_mtp_for_inference() (in module bridge.utils.common_utils) disable_sequence_parallel_comm (bridge.peft.utils.AdapterAttributes attribute) disable_tensor_parallel_comm (bridge.peft.utils.AdapterAttributes attribute) dispatch() (in module bridge.models.decorators.dispatch) dist (bridge.training.config.ConfigContainer attribute) dist_train (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) distill() (in module bridge.training.distill) DistillationProvider (class in bridge.models.distillation_provider) distribute_saved_activations (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) distributed_fused_adam_with_cosine_annealing() (in module bridge.recipes.utils.optimizer_utils) distributed_fused_adam_with_cosine_annealing_samples() (in module bridge.recipes.utils.optimizer_utils) distributed_muon_with_cosine_annealing() (in module bridge.recipes.utils.optimizer_utils) DistributedDataParallelConfig (class in bridge.training.config) DistributedInitConfig (class in bridge.training.config) DistributedSignalHandler (class in bridge.training.utils.sig_utils) DistTrainConfig (class in bridge.models.qwen_vl.qwen3_vl_provider) DistTrainProcessGroupCollection (class in bridge.training.utils.pg_utils) DiTCrossAttention (class in bridge.diffusion.models.common.dit_attention) DiTCrossAttentionSubmodules (class in bridge.diffusion.models.common.dit_attention) DiTSelfAttention (class in bridge.diffusion.models.common.dit_attention) do_convert_from_hf (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) do_sample (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) do_test (bridge.training.config.FinetuningDatasetConfig attribute) (bridge.training.state.TrainState attribute) do_train (bridge.training.state.TrainState attribute) do_valid (bridge.training.state.TrainState attribute) do_validation (bridge.training.config.FinetuningDatasetConfig attribute) DoRA (class in bridge.peft.dora) DoRALinear (class in bridge.peft.dora_layers) download_mode (bridge.data.builders.hf_dataset.HFDatasetConfig attribute) drop_last (bridge.training.config.DataloaderConfig attribute) dropout (bridge.peft.canonical_lora.CanonicalLoRA attribute) (bridge.peft.dora.DoRA attribute) (bridge.peft.lora.LoRA attribute) dropout_position (bridge.peft.canonical_lora.CanonicalLoRA attribute) (bridge.peft.dora.DoRA attribute) (bridge.peft.lora.LoRA attribute) dtype (bridge.diffusion.common.flow_matching.adapters.base.FlowMatchingContext attribute) (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM property) (bridge.models.hf_pretrained.vlm.PreTrainedVLM property) dtype_from_hf() (bridge.models.conversion.model_bridge.MegatronModelBridge method) (in module bridge.training.model_load_save) dtype_from_str() (bridge.models.conversion.model_bridge.MegatronModelBridge method) (in module bridge.training.model_load_save) DTYPE_MAP (in module bridge.utils.activation_map) dump_dataclass_to_yaml() (in module bridge.utils.yaml_utils) E eager_attention_forward() (in module bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr) early_stopping (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) EmbeddingScalingMixin (class in bridge.models.gemma.modules) EmbedND (class in bridge.diffusion.models.flux.layers) empty_cuda_cache (bridge.training.config.InProcessRestartConfig attribute) enable_adapter_layers() (bridge.peft.adapter_wrapper.AdapterWrapper method) (bridge.peft.base.PEFT method) (bridge.peft.lora_layers.LinearAdapter method) (bridge.peft.lora_layers.TELinearAdapter method) enable_autocast (bridge.models.t5_provider.T5ModelProvider attribute) enable_cuda_graph (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) enable_exit_handler (bridge.recipes.run_plugins.PreemptionPlugin attribute) (bridge.recipes.run_plugins.PreemptionPluginScriptArgs attribute) enable_exit_handler_for_data_loader (bridge.recipes.run_plugins.PreemptionPlugin attribute) (bridge.recipes.run_plugins.PreemptionPluginScriptArgs attribute) enable_forward_pre_hook() (in module bridge.training.train) enable_ft_package (bridge.recipes.run_plugins.FaultTolerancePlugin attribute) (bridge.recipes.run_plugins.FaultTolerancePluginScriptArgs attribute) (bridge.training.config.FaultToleranceConfig attribute) enable_layernorm_sm_margin (bridge.recipes.run_plugins.PerfEnvPlugin attribute) enable_logging (bridge.training.config.NVRxStragglerDetectionConfig attribute) enable_manual_gc (bridge.recipes.run_plugins.PerfEnvPlugin attribute) (bridge.recipes.run_plugins.PerfEnvPluginScriptArgs attribute) enable_megatron_core_experimental (bridge.training.config.DistributedInitConfig attribute) enable_straggler_on_startup (bridge.training.config.StragglerDetectionConfig attribute) enable_vboost (bridge.recipes.run_plugins.PerfEnvPlugin attribute) enabled (bridge.training.config.InProcessRestartConfig attribute) (bridge.training.config.NVRxStragglerDetectionConfig attribute) (bridge.training.config.TensorInspectConfig attribute) encode() (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) encode_batch() (bridge.data.energon.hf_encoder_task_encoder.HFEncoderVLMTaskEncoder method) encode_pil_to_jpeg_data_url() (in module bridge.models.nemotron_vl.nemotron_vl_utils) encode_prompt() (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.FluxInferencePipeline method) encode_sample() (bridge.data.energon.hf_encoder_task_encoder.HFEncoderVLMTaskEncoder method) (bridge.diffusion.data.common.diffusion_task_encoder_with_sp.DiffusionTaskEncoderWithSequencePacking method) (bridge.diffusion.data.flux.flux_taskencoder.FluxTaskEncoder method) (bridge.diffusion.data.wan.wan_taskencoder.WanTaskEncoder method) encode_text() (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) EncodeKwargs (class in bridge.models.hf_pretrained.causal_lm) encoder_num_layers (bridge.models.t5_provider.T5ModelProvider attribute) encoder_pipeline_model_parallel_size (bridge.models.t5_provider.T5ModelProvider attribute) encoder_seq_lengths (bridge.data.mimo.hf_provider.HFMimoDatasetProvider attribute) (bridge.data.mimo.mock_provider.MockMimoProvider attribute) EnergonDataloader (class in bridge.data.energon.base_energon_datamodule) EnergonMultiModalDataModule (class in bridge.data.energon.base_energon_datamodule) EnergonProvider (class in bridge.data.energon.energon_provider) energy_monitor (bridge.training.state.GlobalState property) ensure_directory_exists() (in module bridge.training.utils.checkpoint_utils) entity (bridge.recipes.run_plugins.WandbPlugin attribute) (bridge.recipes.run_plugins.WandbPluginScriptArgs attribute) eos_token_id (bridge.models.gemma_vl.gemma3_vl_provider.Gemma3VLModelProvider attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) (bridge.models.kimi_vl.kimi_k25_vl_provider.KimiK25VLModelProvider attribute) (bridge.models.qwen_audio.qwen2_audio_provider.Qwen2AudioModelProvider attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.qwen25_vl_provider.Qwen25VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) ep_rank (bridge.models.conversion.param_mapping.MegatronParamMapping property) ep_size (bridge.models.conversion.param_mapping.MegatronParamMapping property) etp_rank (bridge.models.conversion.param_mapping.MegatronParamMapping property) etp_size (bridge.models.conversion.param_mapping.MegatronParamMapping property) eval_interval (bridge.training.config.TrainingConfig attribute) eval_iters (bridge.training.config.TrainingConfig attribute) evaluate() (in module bridge.training.eval) evaluate_and_print_results() (in module bridge.training.eval) events (bridge.training.callbacks.CallbackManager property) exclude_modules (bridge.peft.module_matcher.ModuleMatcher attribute) expand_thw() (in module bridge.models.qwen_vl.modelling_qwen3_vl.utils) experimental_attention_variant (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) experimental_fn() (in module bridge.utils.decorators) expert_model_parallel_size (bridge.models.model_provider.ModelParallelKwargs attribute) (bridge.recipes.qwen_vl.qwen3_vl.Qwen3VLCommonKwargs attribute) expert_tensor_parallel_size (bridge.models.mimo.mimo_config.ModuleParallelismConfig attribute) (bridge.models.model_provider.ModelParallelKwargs attribute) export_adapter_ckpt() (bridge.models.conversion.auto_bridge.AutoBridge method) export_adapter_weights() (bridge.models.conversion.auto_bridge.AutoBridge method) export_ckpt() (bridge.models.conversion.auto_bridge.AutoBridge method) export_hf_weights() (bridge.models.conversion.auto_bridge.AutoBridge method) extend_instance() (in module bridge.models.gemma.modules) external_gpu_device_mapping (bridge.training.config.DistributedInitConfig attribute) extra_repr() (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRTextRMSNorm method) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerTextRMSNorm method) extract_and_remove_override() (in module bridge.recipes.utils.dataset_utils) extract_expert_number_from_param() (in module bridge.utils.common_utils) extract_skipped_token_ids() (in module bridge.data.vlm_datasets.token_utils) extract_sort_key() (in module bridge.models.conversion.utils) F F_latents (bridge.diffusion.data.wan.wan_energon_datamodule.WanDatasetConfig attribute) (bridge.diffusion.data.wan.wan_mock_datamodule.WanMockDataModuleConfig attribute) fast_pos_embed_interpolate() (bridge.models.qwen_vl.modelling_qwen3_vl.vision_model.Qwen3VLVisionModel method) fault_tolerance_state (bridge.training.state.GlobalState property) FaultToleranceConfig (class in bridge.training.config) FaultTolerancePlugin (class in bridge.recipes.run_plugins) FaultTolerancePluginScriptArgs (class in bridge.recipes.run_plugins) FaultToleranceState (class in bridge.training.state) fc1_dgrad (bridge.training.comm_overlap.TransformerLayerTPOverlapCfg attribute) fc1_fprop (bridge.training.comm_overlap.TransformerLayerTPOverlapCfg attribute) fc1_wgrad (bridge.training.comm_overlap.TransformerLayerTPOverlapCfg attribute) fc2_dgrad (bridge.training.comm_overlap.TransformerLayerTPOverlapCfg attribute) fc2_fprop (bridge.training.comm_overlap.TransformerLayerTPOverlapCfg attribute) feature_attention_mask (bridge.training.utils.visual_inputs.Qwen2AudioInputs attribute) feature_dirs (bridge.training.config.TensorInspectConfig attribute) feature_extractor_class (bridge.models.qwen3_asr.hf_qwen3_asr.processing_qwen3_asr.Qwen3ASRProcessor attribute) features (bridge.training.config.TensorInspectConfig attribute) ffn_hidden_size (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider14B attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider1_3B attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider27B attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider2B attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider9B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider12B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider1B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider27B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider4B attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider2B attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider7B attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.gpt_provider.GPTProvider175B attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider14B attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider3B attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider8B attribute) (bridge.models.mistral.mistral_provider.MistralModelProvider attribute) (bridge.models.mistral.mistral_provider.MistralSmall3ModelProvider24B attribute) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.Nemotron3NanoProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider47B attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider4B attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider56B attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider8B attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronNanoModelProvider12Bv2 attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronNanoModelProvider9Bv2 attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) file_exists() (in module bridge.training.utils.checkpoint_utils) fill_packing_strategy() (in module bridge.data.datasets.packing_utils) final_logit_softcapping (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider attribute) finalize() (bridge.models.gpt.gpt_builder.GPTModelConfig method) (bridge.models.mamba.mamba_builder.MambaModelConfig method) (bridge.models.mamba.mamba_provider.MambaModelProvider method) (bridge.models.mimo.mimo_config.MimoParallelismConfig method) (bridge.models.mimo.mimo_config.ModuleParallelismConfig method) (bridge.models.mimo.mimo_provider.MimoModelProvider method) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider method) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider method) (bridge.models.transformer_config.HeterogeneousTransformerConfig method) (bridge.models.transformer_config.MLATransformerConfig method) (bridge.models.transformer_config.TransformerConfig method) (bridge.training.comm_overlap.CommOverlapConfig method) (bridge.training.config.CheckpointConfig method) (bridge.training.config.DataloaderConfig method) (bridge.training.config.DistributedDataParallelConfig method) (bridge.training.config.GPTDatasetConfig method) (bridge.training.config.LoggerConfig method) (bridge.training.config.MockGPTDatasetConfig method) (bridge.training.config.NVRxStragglerDetectionConfig method) (bridge.training.config.OptimizerConfig method) (bridge.training.config.ProfilingConfig method) (bridge.training.config.SchedulerConfig method) (bridge.training.config.TensorInspectConfig method) (bridge.training.config.TrainingConfig method) (bridge.training.mixed_precision.MixedPrecisionConfig method) finalize_async_saves() (bridge.training.checkpointing.CheckpointManager method) (bridge.training.checkpointing.DefaultCheckpointManager method) finalize_model_grads_multimodule() (in module bridge.training.mimo_parallel_utils) finalize_tensor_inspect_post_model_initialization() (in module bridge.training.tensor_inspect) find_checkpoint_rank_0() (in module bridge.training.checkpointing) find_class() (bridge.utils.safe_pickle._RestrictedUnpickler method) find_first_bin_that_fits() (in module bridge.diffusion.data.common.sequence_packing_utils) find_pattern_indices() (in module bridge.data.energon.task_encoder_utils) find_vision_id_index() (in module bridge.models.qwen_vl.modelling_qwen3_vl.utils) finetune() (in module bridge.training.finetune) finetuning_train_valid_test_datasets_provider() (in module bridge.data.utils) FinetuningDatasetBuilder (class in bridge.data.builders.finetuning_dataset) FinetuningDatasetConfig (class in bridge.training.config) fire() (bridge.training.callbacks.CallbackManager method) first_fit() (in module bridge.data.datasets.packing_utils) (in module bridge.diffusion.data.common.sequence_packing_utils) first_fit_decreasing() (in module bridge.data.datasets.packing_utils) (in module bridge.diffusion.data.common.sequence_packing_utils) first_fit_shuffle() (in module bridge.data.datasets.packing_utils) first_last_layers_bf16 (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.training.mixed_precision.MixedPrecisionConfig attribute) flash_decode (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider attribute) float() (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) floating_point_operations_so_far (bridge.training.state.TrainState attribute) FlowInferencePipeline (class in bridge.diffusion.models.wan.flow_matching.flow_inference_pipeline) FlowMatchEulerDiscreteScheduler (class in bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline) FlowMatchingContext (class in bridge.diffusion.common.flow_matching.adapters.base) FlowMatchingPipeline (class in bridge.diffusion.common.flow_matching.flow_matching_pipeline) Flux (class in bridge.diffusion.models.flux.flux_model) flux_12b_pretrain_config() (in module bridge.diffusion.recipes.flux.flux) flux_12b_sft_config() (in module bridge.diffusion.recipes.flux.flux) flux_data_step() (in module bridge.diffusion.models.flux.flux_step) FluxBridge (class in bridge.diffusion.conversion.flux.flux_bridge) FluxDataModuleConfig (class in bridge.diffusion.data.flux.flux_energon_datamodule) FluxDatasetConfig (class in bridge.diffusion.data.flux.flux_energon_datamodule) FluxForwardStep (class in bridge.diffusion.models.flux.flux_step) FluxInferencePipeline (class in bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline) FluxMockDataModuleConfig (class in bridge.diffusion.data.flux.flux_mock_datamodule) FluxProvider (class in bridge.diffusion.models.flux.flux_provider) FluxSafeTensorsStateSource (class in bridge.diffusion.conversion.flux.flux_hf_pretrained) FluxSingleAttention (class in bridge.diffusion.models.flux.flux_attention) FluxSingleTransformerBlock (class in bridge.diffusion.models.flux.flux_layer_spec) FluxTaskEncoder (class in bridge.diffusion.data.flux.flux_taskencoder) forall() (in module bridge.peft.walk_utils) force_nccl_backend_init() (in module bridge.training.initialize) force_param_sync() (in module bridge.training.train) force_system_message (bridge.training.tokenizers.config.TokenizerConfig attribute) forward() (bridge.diffusion.common.flow_matching.adapters.base.ModelAdapter method) (bridge.diffusion.common.flow_matching.adapters.simple.SimpleAdapter method) (bridge.diffusion.common.flow_matching.flow_matching_pipeline.LinearInterpolationSchedule method) (bridge.diffusion.models.common.dit_embeddings.ParallelTimestepEmbedding method) (bridge.diffusion.models.common.normalization.RMSNorm method) (bridge.diffusion.models.flux.flow_matching.flux_adapter.MegatronFluxAdapter method) (bridge.diffusion.models.flux.flux_attention.FluxSingleAttention method) (bridge.diffusion.models.flux.flux_attention.JointSelfAttention method) (bridge.diffusion.models.flux.flux_layer_spec.AdaLN method) (bridge.diffusion.models.flux.flux_layer_spec.AdaLNContinuous method) (bridge.diffusion.models.flux.flux_layer_spec.FluxSingleTransformerBlock method) (bridge.diffusion.models.flux.flux_layer_spec.MMDiTLayer method) (bridge.diffusion.models.flux.flux_model.Flux method) (bridge.diffusion.models.flux.layers.EmbedND method) (bridge.diffusion.models.flux.layers.MLPEmbedder method) (bridge.diffusion.models.flux.layers.TimeStepEmbedder method) (bridge.diffusion.models.wan.flow_matching.flow_matching_pipeline_wan.WanAdapter method) (bridge.diffusion.models.wan.rope_utils.Wan3DRopeEmbeddings method) (bridge.diffusion.models.wan.wan_layer_spec.WanAdaLN method) (bridge.diffusion.models.wan.wan_layer_spec.WanLayerWithAdaLN method) (bridge.diffusion.models.wan.wan_model.Head method) (bridge.diffusion.models.wan.wan_model.WanModel method) (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2Attention method) (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2DecoderLayer method) (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2FlashAttention2 method) (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2ForCausalLM method) (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2Gate method) (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2MLP method) (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2Model method) (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2MTPLayer method) (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2RMSNorm method) (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2RotaryEmbedding method) (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2SdpaAttention method) (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2SparseMoeBlock method) (bridge.models.gemma.gemma2_provider.Gemma2DotProductAttention method) (bridge.models.gemma.gemma2_provider.Gemma2OutputLayer method) (bridge.models.gemma.gemma2_provider.TERowParallelLinearLayerNorm method) (bridge.models.gemma.gemma3_provider.Gemma3LanguageModelEmbedding method) (bridge.models.gemma.gemma3_provider.Gemma3RotaryEmbedding method) (bridge.models.gemma.gemma3_provider.Gemma3SelfAttention method) (bridge.models.gemma.gemma3_provider.TERowParallelLinearLayerNorm method) (bridge.models.gemma.modules.EmbeddingScalingMixin method) (bridge.models.gemma_vl.modeling_gemma3_vl.Gemma3VLModel method) (bridge.models.gemma_vl.modeling_gemma3_vl.Gemma3VLMultimodalProjector method) (bridge.models.glm_vl.modeling_glm_45v.GLM45VModel method) (bridge.models.gpt_full_te_layer_autocast_spec.AutocastTransformerLayer method) (bridge.models.gpt_full_te_layer_autocast_spec.TETransformerLayerAutocast method) (bridge.models.kimi_vl.modeling_kimi_k25_vl.KimiK25VLModel method) (bridge.models.llama.llama4_utils.Llama4SelfAttention method) (bridge.models.minimax_m2.minimax_m2_provider._FullDimRMSNorm method) (bridge.models.ministral3.ministral3_provider.MinistralTEDotProductAttention method) (bridge.models.ministral3.modeling_ministral3.Ministral3Model method) (bridge.models.nemotron_vl.modeling_nemotron_vl.NemotronVLModel method) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRAudioAttention method) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRAudioEncoder method) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRAudioEncoderLayer method) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRTextAttention method) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRTextMLP method) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRTextRMSNorm method) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerForConditionalGeneration method) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerTextAttention method) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerTextDecoderLayer method) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerTextMLP method) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerTextModel method) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerTextRMSNorm method) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerTextRotaryEmbedding method) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.SinusoidsPositionEmbedding method) (bridge.models.qwen3_asr.modeling_qwen3_asr.model.Qwen3ASRModel method) (bridge.models.qwen3_asr.modeling_qwen3_asr.thinker_model.Qwen3ASRThinkerModel method) (bridge.models.qwen_audio.modeling_qwen2_audio.Qwen2AudioModel method) (bridge.models.qwen_omni.modeling_qwen25_omni.model.Qwen25OmniModel method) (bridge.models.qwen_omni.modeling_qwen25_omni.thinker_model.Qwen25OmniThinkerModel method) (bridge.models.qwen_vl.modeling_qwen25_vl.Qwen25VLModel method) (bridge.models.qwen_vl.modelling_qwen3_vl.attention.Qwen3VLSelfAttention method) (bridge.models.qwen_vl.modelling_qwen3_vl.model.Qwen3VLModel method) (bridge.models.qwen_vl.modelling_qwen3_vl.rope.Qwen3VLMultimodalRotaryEmbedding method) (bridge.models.qwen_vl.modelling_qwen3_vl.text_model.Qwen3VLGPTModel method) (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_block.Qwen3VLTransformerBlock method) (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_block.Qwen3VLVisionTransformerBlock method) (bridge.models.qwen_vl.modelling_qwen3_vl.utils.AllGatherVisionEmbeddings static method) (bridge.models.qwen_vl.modelling_qwen3_vl.utils.Qwen3VLVisionPatchEmbed method) (bridge.models.qwen_vl.modelling_qwen3_vl.utils.Qwen3VLVisionPatchMerger method) (bridge.models.qwen_vl.modelling_qwen3_vl.utils.Qwen3VLVisionRotaryEmbedding method) (bridge.models.qwen_vl.modelling_qwen3_vl.vision_model.Qwen3VLVisionModel method) (bridge.peft.canonical_lora.LoRALinearSplitFC1UpGate method) (bridge.peft.canonical_lora.LoRALinearSplitQKV method) (bridge.peft.dora_layers.DoRALinear method) (bridge.peft.lora_layers.LinearAdapter method) (bridge.peft.lora_layers.LoRALinear method) (bridge.peft.lora_layers.LoRATopKRouter method) (bridge.peft.lora_layers.TEFusedLoRALinear method) (bridge.peft.lora_layers.TELinearAdapter method) (bridge.peft.utils._All2AllHp2Sp static method) (bridge.peft.utils.ParallelLinearAdapter method) forward_pp_step() (bridge.diffusion.models.wan.flow_matching.flow_inference_pipeline.FlowInferencePipeline method) forward_step() (in module bridge.models.qwen_vl.qwen3_vl_step) (in module bridge.training.audio_lm_step) (in module bridge.training.gpt_step) (in module bridge.training.llava_step) (in module bridge.training.mimo_step) (in module bridge.training.vlm_step) forward_step_modelopt() (in module bridge.training.gpt_step) ForwardStepCallable (in module bridge.training.forward_step_func_types) ForwardStepFunc (in module bridge.training.forward_step_func_types) ForwardStepFunctor (class in bridge.training.forward_step_func_types) FourArgForwardStep (class in bridge.training.forward_step_func_types) fp16 (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider attribute) (bridge.models.llama_nemotron.llama_nemotron_provider.LlamaNemotronHeterogeneousProvider attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.mimo.mimo_provider.MimoModelProvider attribute) (bridge.models.model_provider.GetModelKwargs attribute) (bridge.training.mixed_precision.MixedPrecisionConfig attribute) fp16_lm_cross_entropy (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) (bridge.models.gpt.gpt_builder.GPTModelConfig attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.mamba.mamba_builder.MambaModelConfig attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.qwen3_asr.modeling_qwen3_asr.transformer_config.Qwen3ASRTransformerConfig attribute) (bridge.models.qwen_omni.modeling_qwen25_omni.transformer_config.Qwen25OmniTransformerConfig attribute) (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config.Qwen3VLTransformerConfig attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) fp16_mixed() (in module bridge.training.mixed_precision) fp16_with_fp8_current_scaling_mixed() (in module bridge.training.mixed_precision) fp16_with_fp8_delayed_scaling_mixed() (in module bridge.training.mixed_precision) fp16_with_fp8_subchannel_scaling_mixed() (in module bridge.training.mixed_precision) fp16_with_mxfp8_mixed() (in module bridge.training.mixed_precision) fp32 (bridge.training.mixed_precision.MixedPrecisionConfig attribute) fp4 (bridge.training.mixed_precision.MixedPrecisionConfig attribute) fp4_recipe (bridge.training.mixed_precision.MixedPrecisionConfig attribute) fp8 (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.training.mixed_precision.MixedPrecisionConfig attribute) fp8_amax_compute_algo (bridge.training.mixed_precision.MixedPrecisionConfig attribute) fp8_amax_history_len (bridge.training.mixed_precision.MixedPrecisionConfig attribute) fp8_buf (bridge.training.comm_overlap.PipelineOverlapCfg attribute) (bridge.training.comm_overlap.RingExchangeOverlapCfg attribute) fp8_dot_product_attention (bridge.training.mixed_precision.MixedPrecisionConfig attribute) fp8_margin (bridge.training.mixed_precision.MixedPrecisionConfig attribute) fp8_multi_head_attention (bridge.training.mixed_precision.MixedPrecisionConfig attribute) fp8_param (bridge.training.mixed_precision.MixedPrecisionConfig attribute) fp8_param_gather (bridge.training.mixed_precision.MixedPrecisionConfig attribute) fp8_recipe (bridge.training.mixed_precision.MixedPrecisionConfig attribute) fp8_wgrad (bridge.training.mixed_precision.MixedPrecisionConfig attribute) fps (bridge.diffusion.data.common.diffusion_sample.DiffusionSample attribute) freeze() (bridge.models.gemma_vl.modeling_gemma3_vl.Gemma3VLModel method) (bridge.models.glm_vl.modeling_glm_45v.GLM45VModel method) (bridge.models.kimi_vl.modeling_kimi_k25_vl.KimiK25VLModel method) (bridge.models.ministral3.modeling_ministral3.Ministral3Model method) (bridge.models.nemotron_vl.modeling_nemotron_vl.NemotronVLModel method) (bridge.models.qwen3_asr.modeling_qwen3_asr.model.Qwen3ASRModel method) (bridge.models.qwen3_asr.modeling_qwen3_asr.thinker_model.Qwen3ASRThinkerModel method) (bridge.models.qwen_audio.modeling_qwen2_audio.Qwen2AudioModel method) (bridge.models.qwen_omni.modeling_qwen25_omni.model.Qwen25OmniModel method) (bridge.models.qwen_omni.modeling_qwen25_omni.thinker_model.Qwen25OmniThinkerModel method) (bridge.models.qwen_vl.modeling_qwen25_vl.Qwen25VLModel method) (bridge.models.qwen_vl.modelling_qwen3_vl.model.Qwen3VLModel method) freeze_audio_model (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_audio.qwen2_audio_provider.Qwen2AudioModelProvider attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) freeze_audio_projection (bridge.models.qwen_audio.qwen2_audio_provider.Qwen2AudioModelProvider attribute) freeze_language_model (bridge.models.gemma_vl.gemma3_vl_provider.Gemma3VLModelProvider attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.kimi_vl.kimi_k25_vl_provider.KimiK25VLModelProvider attribute) (bridge.models.mimo.mimo_provider.MimoModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_audio.qwen2_audio_provider.Qwen2AudioModelProvider attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.qwen25_vl_provider.Qwen25VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) (bridge.peft.lora.VLMLoRA attribute) (bridge.recipes.qwen_vl.qwen3_vl.Qwen3VLCommonKwargs attribute) freeze_modality_encoders (bridge.models.mimo.mimo_provider.MimoModelProvider attribute) freeze_modality_projections (bridge.models.mimo.mimo_provider.MimoModelProvider attribute) freeze_model() (bridge.peft.base.PEFT method) (bridge.peft.lora.VLMLoRA method) freeze_vision_model (bridge.models.gemma_vl.gemma3_vl_provider.Gemma3VLModelProvider attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.kimi_vl.kimi_k25_vl_provider.KimiK25VLModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.qwen25_vl_provider.Qwen25VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) (bridge.peft.lora.VLMLoRA attribute) (bridge.recipes.qwen_vl.qwen3_vl.Qwen3VLCommonKwargs attribute) freeze_vision_projection (bridge.models.gemma_vl.gemma3_vl_provider.Gemma3VLModelProvider attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.kimi_vl.kimi_k25_vl_provider.KimiK25VLModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.qwen_vl.qwen25_vl_provider.Qwen25VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) (bridge.peft.lora.VLMLoRA attribute) (bridge.recipes.qwen_vl.qwen3_vl.Qwen3VLCommonKwargs attribute) freq_dim (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) from_auto_config() (bridge.models.conversion.auto_bridge.AutoBridge class method) from_dict() (bridge.models.common.base.ModelConfig class method) (bridge.models.common.base.Serializable class method) (bridge.training.utils.config_utils._ConfigContainerBase class method) from_hf_config() (bridge.models.conversion.auto_bridge.AutoBridge class method) from_hf_pretrained() (bridge.models.config.ConfigProtocol class method) (bridge.models.conversion.auto_bridge.AutoBridge class method) (bridge.models.model_provider.ModelProviderMixin class method) (in module bridge.models.config) from_pretrained() (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM class method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM class method) from_yaml() (bridge.training.utils.config_utils._ConfigContainerBase class method) FSDP_DTENSOR (bridge.training.checkpointing.CheckpointType attribute) ft (bridge.training.config.ConfigContainer attribute) ft_state_path (bridge.training.state.FaultToleranceState attribute) full_self_attention (bridge.diffusion.models.wan.wan_layer_spec.WanWithAdaLNSubmodules attribute) FullDimKNorm (class in bridge.models.minimax_m2.minimax_m2_provider) FullDimQNorm (class in bridge.models.minimax_m2.minimax_m2_provider) FusedExpertMapping (class in bridge.models.conversion.param_mapping) FusedGatedExpertMapping (class in bridge.models.conversion.param_mapping) G G_LOGGER (in module bridge.training.optim) gated_linear_unit (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.llama_nemotron.llama_nemotron_provider.LlamaNemotronHeterogeneousProvider attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) (bridge.models.mistral.mistral_provider.MistralModelProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) GatedMLPMapping (class in bridge.models.conversion.param_mapping) gather_from_ep_ranks() (bridge.models.conversion.param_mapping.MegatronParamMapping method) gather_from_tp_ranks() (bridge.models.conversion.param_mapping.MegatronParamMapping method) GDN_IN_PROJ_KEYS (in module bridge.models.conversion.peft_bridge) GDNConv1dMapping (class in bridge.models.conversion.param_mapping) GDNLinearMapping (class in bridge.models.conversion.param_mapping) GDNLinearMappingSeparate (class in bridge.models.conversion.param_mapping) gemma2_27b_peft_config() (in module bridge.recipes.gemma.gemma2) gemma2_27b_pretrain_config() (in module bridge.recipes.gemma.gemma2) gemma2_27b_sft_config() (in module bridge.recipes.gemma.gemma2) gemma2_2b_peft_config() (in module bridge.recipes.gemma.gemma2) gemma2_2b_pretrain_config() (in module bridge.recipes.gemma.gemma2) gemma2_2b_sft_config() (in module bridge.recipes.gemma.gemma2) gemma2_9b_peft_config() (in module bridge.recipes.gemma.gemma2) gemma2_9b_pretrain_config() (in module bridge.recipes.gemma.gemma2) gemma2_9b_sft_config() (in module bridge.recipes.gemma.gemma2) gemma2_layer_spec() (in module bridge.models.gemma.gemma2_provider) Gemma2Bridge (class in bridge.models.gemma.gemma2_bridge) Gemma2DotProductAttention (class in bridge.models.gemma.gemma2_provider) Gemma2ModelProvider (class in bridge.models.gemma.gemma2_provider) Gemma2ModelProvider27B (class in bridge.models.gemma.gemma2_provider) Gemma2ModelProvider2B (class in bridge.models.gemma.gemma2_provider) Gemma2ModelProvider9B (class in bridge.models.gemma.gemma2_provider) Gemma2OutputLayer (class in bridge.models.gemma.gemma2_provider) gemma3_1b_peft_config() (in module bridge.recipes.gemma.gemma3) gemma3_1b_pretrain_config() (in module bridge.recipes.gemma.gemma3) gemma3_1b_sft_config() (in module bridge.recipes.gemma.gemma3) gemma3_layer_spec() (in module bridge.models.gemma.gemma3_provider) gemma3_vl_12b_peft_config() (in module bridge.recipes.gemma3_vl.gemma3_vl) gemma3_vl_12b_sft_config() (in module bridge.recipes.gemma3_vl.gemma3_vl) gemma3_vl_27b_peft_config() (in module bridge.recipes.gemma3_vl.gemma3_vl) gemma3_vl_27b_sft_config() (in module bridge.recipes.gemma3_vl.gemma3_vl) gemma3_vl_4b_peft_config() (in module bridge.recipes.gemma3_vl.gemma3_vl) gemma3_vl_4b_sft_config() (in module bridge.recipes.gemma3_vl.gemma3_vl) Gemma3LanguageModelEmbedding (class in bridge.models.gemma.gemma3_provider) Gemma3ModelBridge (class in bridge.models.gemma.gemma3_bridge) Gemma3ModelProvider (class in bridge.models.gemma.gemma3_provider) Gemma3ModelProvider12B (class in bridge.models.gemma.gemma3_provider) Gemma3ModelProvider1B (class in bridge.models.gemma.gemma3_provider) Gemma3ModelProvider27B (class in bridge.models.gemma.gemma3_provider) Gemma3ModelProvider4B (class in bridge.models.gemma.gemma3_provider) Gemma3RotaryEmbedding (class in bridge.models.gemma.gemma3_provider) Gemma3SelfAttention (class in bridge.models.gemma.gemma3_provider) Gemma3TEDotProductAttention (class in bridge.models.gemma.gemma3_provider) Gemma3VLBridge (class in bridge.models.gemma_vl.gemma3_vl_bridge) Gemma3VLModel (class in bridge.models.gemma_vl.modeling_gemma3_vl) Gemma3VLModelProvider (class in bridge.models.gemma_vl.gemma3_vl_provider) Gemma3VLMultimodalProjector (class in bridge.models.gemma_vl.modeling_gemma3_vl) Gemma3VLMultimodalProjectorConfig (class in bridge.models.gemma_vl.modeling_gemma3_vl) GEMMA_3N_TOKENS (in module bridge.data.vlm_datasets.token_utils) GEMMA_TOKENS (in module bridge.data.vlm_datasets.token_utils) GemmaBridge (class in bridge.models.gemma.gemma_bridge) GemmaModelProvider (class in bridge.models.gemma.gemma_provider) GemmaModelProvider2B (class in bridge.models.gemma.gemma_provider) GemmaModelProvider7B (class in bridge.models.gemma.gemma_provider) generate() (bridge.diffusion.models.wan.flow_matching.flow_inference_pipeline.FlowInferencePipeline method) (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRForConditionalGeneration method) generate_state_dict() (in module bridge.training.checkpointing) GenerateKwargs (class in bridge.models.hf_pretrained.causal_lm) generation_config (bridge.models.common.base.ModelConfig attribute) (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM property) (bridge.models.hf_pretrained.vlm.PreTrainedVLM property) (bridge.models.kimi_vl.kimi_k25_vl_provider.KimiK25VLModelProvider attribute) GENERATION_REGEX (in module bridge.data.datasets.utils) GenericVisualInputs (class in bridge.training.utils.visual_inputs) get() (bridge.models.hf_pretrained.state.StateDict method) get_active_module_pg() (in module bridge.training.mimo_parallel_utils) get_adapter_attributes_from_linear() (in module bridge.peft.utils) get_added_query_key_value_tensors() (bridge.diffusion.models.flux.flux_attention.JointSelfAttention method) get_all_keys() (bridge.models.hf_pretrained.state.DictStateSource method) (bridge.models.hf_pretrained.state.SafeTensorsStateSource method) (bridge.models.hf_pretrained.state.StateSource method) get_all_mappings() (bridge.models.conversion.mapping_registry.MegatronMappingRegistry method) get_artifacts() (bridge.models.hf_pretrained.base.PreTrainedBase method) get_audio_features() (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerForConditionalGeneration method) (bridge.models.qwen3_asr.modeling_qwen3_asr.thinker_model.Qwen3ASRThinkerModel method) (bridge.models.qwen_omni.modeling_qwen25_omni.thinker_model.Qwen25OmniThinkerModel method) get_batch() (in module bridge.models.qwen_vl.qwen3_vl_step) (in module bridge.training.audio_lm_step) (in module bridge.training.gpt_step) (in module bridge.training.llava_step) (in module bridge.training.mimo_step) (in module bridge.training.vlm_step) get_batch_from_iterator() (in module bridge.models.qwen_vl.qwen3_vl_step) (in module bridge.training.audio_lm_step) (in module bridge.training.gpt_step) (in module bridge.training.llava_step) (in module bridge.training.vlm_step) get_batch_on_this_tp_rank() (in module bridge.training.utils.batch_utils) get_blend_and_blend_per_split() (in module bridge.data.loaders) get_blend_fields_from_data_paths() (in module bridge.recipes.utils.dataset_utils) get_builder_cls() (bridge.models.common.base.ModelConfig method) get_causal_lm_class_name_via_auto_map() (in module bridge.models.conversion.utils) get_checkpoint_name() (in module bridge.training.utils.checkpoint_utils) get_checkpoint_run_config_filename() (in module bridge.training.utils.checkpoint_utils) get_checkpoint_tracker_filename() (in module bridge.training.utils.checkpoint_utils) get_checkpoint_train_state_filename() (in module bridge.training.utils.checkpoint_utils) get_checkpoint_version() (in module bridge.training.checkpointing) get_chunked_index() (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRPreTrainedModelForConditionalGeneration method) (bridge.models.qwen3_asr.hf_qwen3_asr.processing_qwen3_asr.Qwen3ASRProcessor method) (in module bridge.models.qwen_omni.modeling_qwen25_omni.rope) get_collate_fn() (bridge.data.mimo.base_provider.MimoDatasetProvider method) (bridge.data.mimo.hf_provider.HFMimoDatasetProvider method) (bridge.data.mimo.mock_provider.MockMimoProvider method) get_common_config() (in module bridge.models.sarvam.common) get_common_mapping_list() (in module bridge.models.deepseek.common) get_config_for_layer() (bridge.models.transformer_config.HeterogeneousTransformerConfig method) get_conversion_tasks() (bridge.models.conversion.auto_bridge.AutoBridge method) get_data_parallel_size() (bridge.training.config.ConfigContainer method) get_dataset_provider() (in module bridge.data.utils) get_dataset_root() (in module bridge.data.datasets.sft) get_decoder() (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2ForCausalLM method) get_default_mamba_stack_spec() (in module bridge.models.mamba.mamba_builder) (in module bridge.models.mamba.mamba_provider) get_device() (in module bridge.training.utils.sig_utils) get_dist_train_vision_dp_data() (in module bridge.models.qwen_vl.modelling_qwen3_vl.utils) get_flux_double_transformer_engine_spec() (in module bridge.diffusion.models.flux.flux_layer_spec) get_flux_single_transformer_engine_spec() (in module bridge.diffusion.models.flux.flux_layer_spec) get_fp8_context() (bridge.diffusion.models.flux.flux_model.Flux method) get_gpt_full_te_layer_autocast_spec() (in module bridge.models.gpt_full_te_layer_autocast_spec) get_hf_model_id_from_checkpoint() (bridge.models.conversion.auto_bridge.AutoBridge static method) (in module bridge.training.utils.checkpoint_utils) get_hf_tokenizer_kwargs() (bridge.models.glm_vl.glm_45v_bridge.GLM45VBridge class method) (bridge.models.nemotronh.nemotron_h_bridge.NemotronHBridge class method) get_input_embeddings() (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2ForCausalLM method) (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2Model method) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRAudioEncoder method) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerForConditionalGeneration method) get_language_model_cp_size() (bridge.training.utils.pg_utils.DistTrainProcessGroupCollection method) get_last_rank() (in module bridge.utils.common_utils) get_llama4_layer_spec() (in module bridge.models.llama.llama4_utils) get_llm_pos_ids_for_vision() (in module bridge.models.qwen_omni.modeling_qwen25_omni.rope) get_local_rank_preinit() (in module bridge.utils.common_utils) get_ltor_masks_and_position_ids() (in module bridge.data.energon.task_encoder_utils) get_mappings_by_pattern() (bridge.models.conversion.mapping_registry.MegatronMappingRegistry method) get_master_addr_safe() (in module bridge.utils.common_utils) get_master_port_safe() (in module bridge.utils.common_utils) get_mimo_dp_info() (in module bridge.data.mimo.dp_utils) get_mimo_sampling_info() (in module bridge.data.mimo.dp_utils) get_mixed_precision_config() (in module bridge.training.mixed_precision) get_model() (in module bridge.models.model_provider) get_model_bridge() (in module bridge.models.conversion.model_bridge) get_module_and_param_from_name() (in module bridge.models.conversion.utils) get_module_to_grid_tuple() (in module bridge.training.mimo_parallel_utils) get_output_embeddings() (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2ForCausalLM method) get_packed_seq_params() (in module bridge.training.utils.packed_seq_utils) get_parallelism() (bridge.models.mimo.mimo_config.MimoParallelismConfig method) get_pg_collection() (in module bridge.training.utils.pg_utils) get_placeholder_mask() (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerForConditionalGeneration method) get_query_key_value_tensors() (bridge.diffusion.models.common.dit_attention.DiTCrossAttention method) (bridge.diffusion.models.common.dit_attention.DiTSelfAttention method) (bridge.diffusion.models.flux.flux_attention.JointSelfAttention method) (bridge.models.olmoe.olmoe_provider.OLMoESelfAttention method) get_rank_safe() (in module bridge.utils.common_utils) get_rng_state() (in module bridge.training.checkpointing) get_rope_index() (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRPreTrainedModelForConditionalGeneration method) (in module bridge.models.qwen3_asr.modeling_qwen3_asr.rope) (in module bridge.models.qwen_omni.modeling_qwen25_omni.rope) (in module bridge.models.qwen_vl.modelling_qwen3_vl.rope) get_sample_block() (bridge.data.datasets.utils._OnlineSampleMapping method) get_seqlen_list() (in module bridge.data.datasets.packing_utils) get_shard_idx() (bridge.models.conversion.param_mapping.ChunkedMapping method) (bridge.models.conversion.param_mapping.GDNConv1dMapping method) (bridge.models.conversion.param_mapping.MambaConv1dMapping method) get_start_end_idx() (bridge.data.samplers.MegatronPretrainingSampler method) get_start_time_from_progress_log() (in module bridge.training.train) get_support_languages() (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRForConditionalGeneration method) get_swa() (in module bridge.models.gemma.gemma2_provider) get_text_config() (bridge.models.qwen3_asr.hf_qwen3_asr.configuration_qwen3_asr.Qwen3ASRConfig method) get_torch_version() (in module bridge.utils.import_utils) get_train_valid_test_num_samples() (in module bridge.data.loaders) get_vboost_srun_cmd() (bridge.recipes.run_plugins.PerfEnvPlugin method) get_vision_cp_data() (in module bridge.models.qwen_vl.modelling_qwen3_vl.utils) get_vision_model_config() (in module bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config) get_wan_block_with_transformer_engine_spec() (in module bridge.diffusion.models.wan.wan_layer_spec) get_weight_magnitude() (bridge.peft.dora_layers.ParallelLinearDoRAAdapter method) get_world_size_safe() (in module bridge.utils.common_utils) GetModelKwargs (class in bridge.models.model_provider) glm45_355b_peft_config() (in module bridge.recipes.glm.glm45) glm45_355b_pretrain_config() (in module bridge.recipes.glm.glm45) glm45_355b_sft_config() (in module bridge.recipes.glm.glm45) glm45_air_106b_peft_config() (in module bridge.recipes.glm.glm45) glm45_air_106b_pretrain_config() (in module bridge.recipes.glm.glm45) glm45_air_106b_sft_config() (in module bridge.recipes.glm.glm45) GLM45Bridge (class in bridge.models.glm.glm45_bridge) GLM45VBridge (class in bridge.models.glm_vl.glm_45v_bridge) GLM45VModel (class in bridge.models.glm_vl.modeling_glm_45v) GLM45VModelProvider (class in bridge.models.glm_vl.glm_45v_provider) glm4v_collate_fn() (in module bridge.data.vlm_datasets.collate) glm_45v_peft_config() (in module bridge.recipes.glm_vl.glm_45v) glm_45v_sft_config() (in module bridge.recipes.glm_vl.glm_45v) GLMExpertDownProjMapping (class in bridge.models.glm.glm_moe_mappings) glob() (bridge.models.hf_pretrained.state.StateDict method) GLOBAL (bridge.training.checkpointing.CheckpointType attribute) global_base_prefix (bridge.models.conversion.peft_bridge.AdapterWeight attribute) (bridge.models.conversion.peft_bridge.AdapterWeightConversionTask attribute) global_batch_size (bridge.data.energon.energon_provider.EnergonProvider attribute) (bridge.diffusion.data.common.diffusion_energon_datamodule.DiffusionDataModuleConfig attribute) (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDataModuleConfig attribute) (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDatasetConfig attribute) (bridge.diffusion.data.flux.flux_mock_datamodule.FluxMockDataModuleConfig attribute) (bridge.diffusion.data.wan.wan_energon_datamodule.WanDataModuleConfig attribute) (bridge.diffusion.data.wan.wan_energon_datamodule.WanDatasetConfig attribute) (bridge.diffusion.data.wan.wan_mock_datamodule.WanMockDataModuleConfig attribute) (bridge.recipes.qwen_vl.qwen3_vl.Qwen3VLCommonKwargs attribute) global_param_name (bridge.models.conversion.model_bridge.WeightConversionTask attribute) global_state (bridge.training.setup_mimo.MimoSetupOutput attribute) GlobalState (class in bridge.training.state) gpt3_175b_pretrain_config() (in module bridge.recipes.gpt.gpt3_175b) gpt_oss_120b_peft_config() (in module bridge.recipes.gpt_oss.gpt_oss) gpt_oss_120b_pretrain_config() (in module bridge.recipes.gpt_oss.gpt_oss) gpt_oss_120b_sft_config() (in module bridge.recipes.gpt_oss.gpt_oss) gpt_oss_20b_peft_config() (in module bridge.recipes.gpt_oss.gpt_oss) gpt_oss_20b_peft_fp8_current_scaling_config() (in module bridge.recipes.gpt_oss.gpt_oss) gpt_oss_20b_peft_mxfp8_config() (in module bridge.recipes.gpt_oss.gpt_oss) gpt_oss_20b_pretrain_config() (in module bridge.recipes.gpt_oss.gpt_oss) gpt_oss_20b_pretrain_fp8_current_scaling_config() (in module bridge.recipes.gpt_oss.gpt_oss) gpt_oss_20b_pretrain_mxfp8_config() (in module bridge.recipes.gpt_oss.gpt_oss) gpt_oss_20b_sft_config() (in module bridge.recipes.gpt_oss.gpt_oss) gpt_oss_20b_sft_fp8_current_scaling_config() (in module bridge.recipes.gpt_oss.gpt_oss) gpt_oss_20b_sft_mxfp8_config() (in module bridge.recipes.gpt_oss.gpt_oss) GPTDatasetConfig (class in bridge.training.config) GPTFIMDataset (class in bridge.data.datasets.fim_dataset) GPTFIMDatasetConfig (class in bridge.training.config) GPTModelBuilder (class in bridge.models.gpt.gpt_builder) GPTModelConfig (class in bridge.models.gpt.gpt_builder) GPTModelProvider (class in bridge.models.gpt_provider) GPTOSSBridge (class in bridge.models.gpt_oss.gpt_oss_bridge) GPTOSSMLPDownProjMapping (class in bridge.models.gpt_oss.gpt_oss_bridge) GPTOSSMLPGateUpProjMapping (class in bridge.models.gpt_oss.gpt_oss_bridge) GPTProvider175B (class in bridge.models.gpt_provider) GPTSFTChatDataset (class in bridge.data.datasets.sft) GPTSFTDataset (class in bridge.data.datasets.sft) GPTSFTPackedDataset (class in bridge.data.datasets.sft) GPTSFTPackedParquetDataset (class in bridge.data.datasets.packed_parquet) gpu_individual_perf_threshold (bridge.training.config.NVRxStragglerDetectionConfig attribute) GPU_INSTALL_STRING (in module bridge.utils.import_utils) gpu_only_import() (in module bridge.utils.import_utils) gpu_only_import_from() (in module bridge.utils.import_utils) gpu_relative_perf_threshold (bridge.training.config.NVRxStragglerDetectionConfig attribute) gpu_sm100_or_newer (bridge.recipes.run_plugins.PerfEnvPlugin attribute) grad_norm (bridge.training.callbacks.CallbackContext attribute) grad_reduce_in_fp32 (bridge.training.mixed_precision.MixedPrecisionConfig attribute) gradient_accumulation_fusion (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_audio.qwen2_audio_provider.Qwen2AudioModelProvider attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) granularity (bridge.training.config.InProcessRestartConfig attribute) grid_sizes_calculation() (in module bridge.diffusion.models.wan.utils) group_key (bridge.models.conversion.param_mapping.FusedExpertMapping property) (bridge.models.conversion.param_mapping.FusedGatedExpertMapping property) (bridge.models.gpt_oss.gpt_oss_bridge.GPTOSSMLPDownProjMapping property) (bridge.models.gpt_oss.gpt_oss_bridge.GPTOSSMLPGateUpProjMapping property) group_limited_topk() (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2Gate method) guidance_embed (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) guidance_scale (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) H H_latents (bridge.diffusion.data.wan.wan_energon_datamodule.WanDatasetConfig attribute) (bridge.diffusion.data.wan.wan_mock_datamodule.WanMockDataModuleConfig attribute) half() (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) handle_index() (in module bridge.data.datasets.utils) handle_profiling_step() (in module bridge.training.profiling) handle_profiling_stop() (in module bridge.training.profiling) hard_timeout (bridge.training.config.InProcessRestartConfig attribute) has_callbacks() (bridge.training.callbacks.CallbackManager method) has_glob() (bridge.models.hf_pretrained.state.SafeTensorsStateSource method) (bridge.models.hf_pretrained.state.StateDict method) (bridge.models.hf_pretrained.state.StateSource method) has_language_model() (bridge.training.utils.pg_utils.DistTrainProcessGroupCollection method) has_model (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM property) has_modelopt_state() (in module bridge.training.post_training.checkpointing) HasBool (class in bridge.peft.walk_utils) HAVE_TE (in module bridge.peft.module_matcher) (in module bridge.peft.utils) Head (class in bridge.diffusion.models.wan.wan_model) head_dim (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) heartbeat_interval (bridge.training.config.InProcessRestartConfig attribute) heartbeat_timeout (bridge.training.config.InProcessRestartConfig attribute) hetereogenous_dist_checkpoint (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) heterogeneous_layer_spec() (in module bridge.models.llama_nemotron.llama_nemotron_provider) heterogeneous_layers_config_encoded_json (bridge.models.llama_nemotron.llama_nemotron_provider.LlamaNemotronHeterogeneousProvider attribute) heterogeneous_layers_config_path (bridge.models.llama_nemotron.llama_nemotron_provider.LlamaNemotronHeterogeneousProvider attribute) HeterogeneousTransformerConfig (class in bridge.models.transformer_config) HF_ARCHITECTURE_ALIASES (in module bridge.models.conversion.auto_bridge) HF_BASED_TOKENIZERS (in module bridge.training.model_load_save) hf_config (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) (bridge.models.qwen_audio.qwen2_audio_provider.Qwen2AudioModelProvider attribute) hf_config_to_provider_kwargs() (bridge.models.conversion.model_bridge.MegatronModelBridge method) hf_data_files (bridge.data.mimo.hf_provider.HFMimoDatasetProvider attribute) hf_dataset_name (bridge.data.mimo.hf_provider.HFMimoDatasetProvider attribute) hf_dataset_path (bridge.data.mimo.hf_provider.HFMimoDatasetProvider attribute) hf_filter_lambda (bridge.data.builders.hf_dataset.HFDatasetConfig attribute) hf_filter_lambda_kwargs (bridge.data.builders.hf_dataset.HFDatasetConfig attribute) hf_kwargs (bridge.data.builders.hf_dataset.HFDatasetConfig attribute) hf_model_id (bridge.models.common.base.ModelConfig attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) hf_model_path (bridge.models.kimi_vl.kimi_k25_vl_provider.KimiK25VLModelProvider attribute) (bridge.recipes.qwen_vl.qwen25_vl_dataset.MockQwen25VLDatasetProvider attribute) hf_path (bridge.recipes.qwen_vl.qwen3_vl.Qwen3VLCommonKwargs attribute) hf_processor_path (bridge.data.vlm_datasets.hf_provider.HFDatasetConversationProvider attribute) (bridge.data.vlm_datasets.mock_provider.MockVLMConversationProvider attribute) (bridge.data.vlm_datasets.preloaded_provider.PreloadedVLMConversationProvider attribute) hf_text_config (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config.Qwen3VLTransformerConfig attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) hf_to_megatron() (bridge.models.conversion.model_bridge._HFNameSuffixMapping method) (bridge.models.conversion.param_mapping.AutoMapping method) (bridge.models.conversion.param_mapping.ChunkedMapping method) (bridge.models.conversion.param_mapping.ColumnParallelMapping method) (bridge.models.conversion.param_mapping.ConcatenatedQKVMapping method) (bridge.models.conversion.param_mapping.DirectMapping method) (bridge.models.conversion.param_mapping.FusedExpertMapping method) (bridge.models.conversion.param_mapping.FusedGatedExpertMapping method) (bridge.models.conversion.param_mapping.GatedMLPMapping method) (bridge.models.conversion.param_mapping.GDNLinearMapping method) (bridge.models.conversion.param_mapping.GDNLinearMappingSeparate method) (bridge.models.conversion.param_mapping.KVMapping method) (bridge.models.conversion.param_mapping.MambaInProjMapping method) (bridge.models.conversion.param_mapping.MegatronParamMapping method) (bridge.models.conversion.param_mapping.QKVMapping method) (bridge.models.conversion.param_mapping.ReplicatedMapping method) (bridge.models.conversion.param_mapping.RMSNorm2ZeroCenteredRMSNormMapping method) (bridge.models.conversion.param_mapping.RowParallelMapping method) (bridge.models.gpt_oss.gpt_oss_bridge.GPTOSSMLPDownProjMapping method) (bridge.models.gpt_oss.gpt_oss_bridge.GPTOSSMLPGateUpProjMapping method) (bridge.models.minimax_m2.minimax_m2_bridge._FullDimQKNormMapping method) (bridge.models.nemotronh.nemotron_h_bridge._MTPFlatteningMapping method) (bridge.models.nemotronh.nemotron_h_bridge._MTPFlatteningQKVMapping method) hf_to_megatron_activation() (bridge.models.conversion.model_bridge.MegatronModelBridge class method) hf_to_megatron_lookup() (bridge.models.conversion.mapping_registry.MegatronMappingRegistry method) hf_tokenizer_kwargs (bridge.training.tokenizers.config.TokenizerConfig attribute) hf_tokenizer_path (bridge.data.mimo.hf_provider.HFMimoDatasetProvider attribute) hf_train_valid_test_datasets_provider() (in module bridge.data.utils) HFDatasetBuilder (class in bridge.data.builders.hf_dataset) HFDatasetConfig (class in bridge.data.builders.hf_dataset) HFDatasetConversationProvider (class in bridge.data.vlm_datasets.hf_provider) HFEncoderTaskBatch (class in bridge.data.energon.hf_encoder_task_encoder) HFEncoderTaskSample (class in bridge.data.energon.hf_encoder_task_encoder) HFEncoderVLMTaskEncoder (class in bridge.data.energon.hf_encoder_task_encoder) HFMimoDatasetProvider (class in bridge.data.mimo.hf_provider) HFPreTrained (in module bridge.models.conversion.model_bridge) HFWeightTuple (class in bridge.models.conversion.model_bridge) hidden_dropout (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.gpt_provider.GPTProvider175B attribute) (bridge.models.llama_nemotron.llama_nemotron_provider.LlamaNemotronHeterogeneousProvider attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) (bridge.models.mistral.mistral_provider.MistralModelProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) hidden_size (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider14B attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider1_3B attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider27B attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider2B attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider9B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider12B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider1B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider27B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider4B attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider2B attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider7B attribute) (bridge.models.gemma_vl.modeling_gemma3_vl.Gemma3VLMultimodalProjectorConfig attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.gpt_provider.GPTProvider175B attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider14B attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider3B attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider8B attribute) (bridge.models.mistral.mistral_provider.MistralModelProvider attribute) (bridge.models.mistral.mistral_provider.MistralSmall3ModelProvider24B attribute) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.Nemotron3NanoProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider47B attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider4B attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider56B attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider8B attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronNanoModelProvider12Bv2 attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronNanoModelProvider9Bv2 attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) hidden_states (bridge.models.bailing.modeling_bailing_moe_v2.MoEV2CausalLMOutputWithPast attribute) hierarchical_context_parallel_sizes (bridge.models.model_provider.ModelParallelKwargs attribute) hook_hf_module_setattr_for_tp_grad_sync() (in module bridge.utils.common_utils) hybrid_attention_ratio (bridge.models.mamba.mamba_builder.MambaModelConfig attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) hybrid_layer_pattern (bridge.models.mamba.mamba_builder.MambaModelConfig attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.Nemotron3NanoProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider47B attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider4B attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider56B attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider8B attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronNanoModelProvider12Bv2 attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronNanoModelProvider9Bv2 attribute) hybrid_mlp_ratio (bridge.models.mamba.mamba_builder.MambaModelConfig attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) hybrid_override_pattern (bridge.models.mamba.mamba_builder.MambaModelConfig attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) hysteresis (bridge.training.mixed_precision.MixedPrecisionConfig attribute) I ignore_index (bridge.models.kimi_vl.kimi_k25_vl_provider.KimiK25VLModelProvider attribute) IGNORE_INDEX (in module bridge.data.datasets.utils) (in module bridge.data.energon.task_encoder_utils) image_end_token_id (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) image_folder (bridge.data.vlm_datasets.preloaded_provider.PreloadedVLMConversationProvider attribute) image_grid_thw (bridge.training.utils.visual_inputs.GenericVisualInputs attribute) (bridge.training.utils.visual_inputs.Qwen2_5_VLVisualInputs attribute) image_H (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDatasetConfig attribute) (bridge.diffusion.data.flux.flux_mock_datamodule.FluxMockDataModuleConfig attribute) image_precached (bridge.diffusion.data.flux.flux_mock_datamodule.FluxMockDataModuleConfig attribute) image_processor (bridge.data.energon.energon_provider.EnergonProvider attribute) (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM property) (bridge.models.hf_pretrained.vlm.PreTrainedVLM property) image_size (bridge.data.vlm_datasets.mock_provider.MockVLMConversationProvider attribute) (bridge.diffusion.data.common.diffusion_sample.DiffusionSample attribute) (bridge.models.gemma_vl.modeling_gemma3_vl.Gemma3VLMultimodalProjectorConfig attribute) (bridge.recipes.qwen_vl.qwen25_vl_dataset.MockQwen25VLDatasetProvider attribute) image_sizes (bridge.training.utils.visual_inputs.GenericVisualInputs attribute) image_special_token_id (bridge.models.mimo.llava_provider.LlavaMimoProvider attribute) image_start_token_id (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) image_tag_type (bridge.training.tokenizers.config.TokenizerConfig attribute) image_token_id (bridge.models.gemma_vl.gemma3_vl_provider.Gemma3VLModelProvider attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.kimi_vl.kimi_k25_vl_provider.KimiK25VLModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) (bridge.models.qwen_omni.modeling_qwen25_omni.transformer_config.Qwen25OmniTransformerConfig attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config.Qwen3VLTransformerConfig attribute) (bridge.models.qwen_vl.qwen25_vl_provider.Qwen25VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) image_W (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDatasetConfig attribute) (bridge.diffusion.data.flux.flux_mock_datamodule.FluxMockDataModuleConfig attribute) imgs (bridge.data.energon.task_encoder_utils.ChatMLSample attribute) impl() (bridge.models.decorators.dispatch._Dispatch method) import_ckpt() (bridge.models.conversion.auto_bridge.AutoBridge class method) in_channels (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) (bridge.models.qwen_omni.modeling_qwen25_omni.transformer_config.Qwen25OmniTransformerConfig attribute) (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config.Qwen3VLTransformerConfig attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) in_features (bridge.peft.utils.AdapterAttributes attribute) index_for_timestep() (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.FlowMatchEulerDiscreteScheduler method) infer_mode_from_dataset() (in module bridge.recipes.utils.dataset_utils) infer_target_modules_from_adapter_weights() (in module bridge.models.conversion.peft_bridge) InferenceLossReturn (in module bridge.training.forward_step_func_types) init_checkpointing_context() (in module bridge.training.checkpointing) init_method_const() (in module bridge.peft.utils) init_method_kaiming_uniform() (in module bridge.peft.utils) init_method_normal() (in module bridge.peft.utils) init_method_std (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.llama_nemotron.llama_nemotron_provider.LlamaNemotronHeterogeneousProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) (bridge.models.mistral.mistral_provider.MistralModelProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) init_model_with_meta_device (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.mimo.mimo_provider.MimoModelProvider attribute) (bridge.models.model_provider.GetModelKwargs attribute) init_rerun_state() (in module bridge.training.initialize) init_training_step (bridge.training.config.TensorInspectConfig attribute) init_weight_magnitude() (bridge.peft.dora_layers.ParallelLinearDoRAAdapter method) initial_loss_scale (bridge.training.mixed_precision.MixedPrecisionConfig attribute) initial_rank_heartbeat_timeout (bridge.recipes.run_plugins.FaultTolerancePlugin attribute) initialize() (bridge.training.nvrx_straggler.NVRxStragglerDetectionManager method) initialize_async_checkpoint_worker() (bridge.training.state.GlobalState method) initialize_megatron() (in module bridge.training.initialize) initialize_model_parallel() (bridge.models.mimo.mimo_provider.MimoModelProvider method) (bridge.models.model_provider.ModelProviderMixin method) initialize_pytorch_profiler() (in module bridge.training.profiling) initialize_tensor_inspect_pre_model_initialization() (in module bridge.training.tensor_inspect) inprocess_restart (bridge.training.config.ConfigContainer attribute) inprocess_restart() (in module bridge.training.inprocess_restart) InProcessRestartConfig (class in bridge.training.config) input (bridge.data.builders.hf_dataset.ProcessExampleOutput attribute) input_features (bridge.training.utils.visual_inputs.Qwen2AudioInputs attribute) input_ids (bridge.data.energon.hf_encoder_task_encoder.HFEncoderTaskBatch attribute) (bridge.data.energon.hf_encoder_task_encoder.HFEncoderTaskSample attribute) input_is_parallel (bridge.peft.utils.AdapterAttributes attribute) input_size (bridge.models.gemma_vl.modeling_gemma3_vl.Gemma3VLMultimodalProjectorConfig attribute) instantiate() (in module bridge.utils.instantiate_utils) instantiate_node() (in module bridge.utils.instantiate_utils) InstantiationException InstantiationMode (class in bridge.utils.instantiate_utils) interleaved_attn_pattern (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider attribute) inv_freq (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerTextRotaryEmbedding attribute) is_adapter (bridge.models.conversion.param_mapping.MegatronParamMapping property) is_async_chkpt_enabled (bridge.training.state.FaultToleranceState attribute) is_calculating_timeouts (bridge.training.state.FaultToleranceState attribute) is_checkpoint_iteration_directory() (in module bridge.training.utils.checkpoint_utils) is_current_rank_in_grid() (in module bridge.training.mimo_parallel_utils) is_dataset_built_on_rank() (in module bridge.data.utils) is_empty_async_queue() (in module bridge.training.checkpointing) is_expert (bridge.models.conversion.param_mapping.MegatronParamMapping property) is_expert_linear() (in module bridge.peft.utils) is_grouped_export (bridge.models.conversion.param_mapping._LooseGatedMLPMapping attribute) (bridge.models.conversion.param_mapping.FusedExpertMapping attribute) (bridge.models.conversion.param_mapping.FusedGatedExpertMapping attribute) (bridge.models.gpt_oss.gpt_oss_bridge.GPTOSSMLPDownProjMapping attribute) (bridge.models.gpt_oss.gpt_oss_bridge.GPTOSSMLPGateUpProjMapping attribute) is_hybrid_model (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider attribute) is_last_rank() (in module bridge.utils.common_utils) is_modelopt_dynamic_module() (in module bridge.models.conversion.utils) is_packed_parquet_file() (in module bridge.data.datasets.packed_parquet) is_packed_parquet_spec() (in module bridge.data.datasets.packed_parquet) is_persistent_chkpt_loaded (bridge.training.state.FaultToleranceState attribute) is_pp_first_stage() (in module bridge.models.mimo.mimo_builder) is_pp_last_stage() (in module bridge.models.mimo.mimo_builder) is_rank_in_pg() (in module bridge.training.initialize) is_safe_repo() (in module bridge.models.hf_pretrained.utils) is_setup_section_open (bridge.training.state.FaultToleranceState attribute) is_slurm_job() (in module bridge.utils.slurm_utils) is_tensor_parallel() (in module bridge.models.conversion.model_bridge) is_torch_min_version() (in module bridge.utils.import_utils) is_transformers_min_version() (in module bridge.models.glm_vl.modeling_glm_45v) (in module bridge.models.qwen_vl.modeling_qwen25_vl) is_unavailable() (in module bridge.utils.import_utils) is_vision_language (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider12B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider1B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider27B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider4B attribute) items() (bridge.models.hf_pretrained.state.StateDict method) J JointSelfAttention (class in bridge.diffusion.models.flux.flux_attention) JointSelfAttentionSubmodules (class in bridge.diffusion.models.flux.flux_attention) json2token() (in module bridge.data.vlm_datasets.token_utils) K k_layernorm (bridge.diffusion.models.common.dit_attention.DiTCrossAttentionSubmodules attribute) (bridge.diffusion.models.flux.flux_attention.JointSelfAttentionSubmodules attribute) kd_config (bridge.models.distillation_provider.DistillationProvider attribute) keep_mtp_spec_in_bf16 (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) key_to_filename_map (bridge.models.hf_pretrained.state.SafeTensorsStateSource property) keys() (bridge.models.hf_pretrained.state.StateDict method) kimi_k25_vl_collate_fn() (in module bridge.data.vlm_datasets.collate) kimi_k25_vl_sft_config() (in module bridge.recipes.kimi_vl.kimi_k25_vl) KimiK25VLBridge (class in bridge.models.kimi_vl.kimi_k25_vl_bridge) KimiK25VLModel (class in bridge.models.kimi_vl.modeling_kimi_k25_vl) KimiK25VLModelProvider (class in bridge.models.kimi_vl.kimi_k25_vl_provider) KimiK2Bridge (class in bridge.models.kimi.kimi_bridge) kv_channels (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider27B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider12B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider1B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider27B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider4B attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) (bridge.models.mistral.mistral_provider.MistralSmall3ModelProvider24B attribute) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.Nemotron3NanoProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider4B attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronNanoModelProvider12Bv2 attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronNanoModelProvider9Bv2 attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) kv_lora_rank (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) KVMapping (class in bridge.models.conversion.param_mapping) kwargs (bridge.models.hf_pretrained.vlm.PreTrainedVLM property) L labels (bridge.data.energon.hf_encoder_task_encoder.HFEncoderTaskBatch attribute) (bridge.data.energon.hf_encoder_task_encoder.HFEncoderTaskSample attribute) language_config (bridge.models.mimo.llava_provider.LlavaMimoProvider attribute) language_max_sequence_length (bridge.models.qwen3_asr.modeling_qwen3_asr.transformer_config.Qwen3ASRTransformerConfig attribute) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_omni.modeling_qwen25_omni.transformer_config.Qwen25OmniTransformerConfig attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config.Qwen3VLTransformerConfig attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) language_model_spec (bridge.models.mimo.mimo_provider.MimoModelProvider attribute) language_model_type (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) language_world_size (bridge.models.qwen_vl.qwen3_vl_provider.DistTrainConfig attribute) last_call_wait (bridge.training.config.InProcessRestartConfig attribute) latent_channels (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDataModuleConfig attribute) (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDatasetConfig attribute) latent_shape (bridge.diffusion.data.common.diffusion_sample.DiffusionSample attribute) latents (bridge.diffusion.common.flow_matching.adapters.base.FlowMatchingContext attribute) layernorm_across_heads (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) layernorm_epsilon (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider attribute) (bridge.models.gemma_vl.modeling_gemma3_vl.Gemma3VLMultimodalProjectorConfig attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) (bridge.models.mistral.mistral_provider.MistralModelProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) layernorm_sm_margin (bridge.recipes.run_plugins.PerfEnvPlugin attribute) layernorm_zero_centered_gamma (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider attribute) (bridge.models.gemma_vl.modeling_gemma3_vl.Gemma3VLMultimodalProjectorConfig attribute) (bridge.models.gpt_provider.GPTProvider175B attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) lazy_init (bridge.training.config.DistributedInitConfig property) LENIENT (bridge.utils.instantiate_utils.InstantiationMode attribute) linear_attention_freq (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) linear_conv_kernel_dim (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) linear_fc1 (bridge.models.qwen_vl.modelling_qwen3_vl.utils.PatchMergerSubmodules attribute) linear_fc2 (bridge.models.qwen_vl.modelling_qwen3_vl.utils.PatchMergerSubmodules attribute) linear_in_task (bridge.models.conversion.peft_bridge.AdapterWeightConversionTask attribute) linear_in_weight (bridge.models.conversion.peft_bridge.AdapterWeight attribute) linear_key_head_dim (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) linear_kv (bridge.diffusion.models.common.dit_attention.DiTCrossAttentionSubmodules attribute) linear_num_key_heads (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) linear_num_value_heads (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) linear_out_task (bridge.models.conversion.peft_bridge.AdapterWeightConversionTask attribute) linear_out_weight (bridge.models.conversion.peft_bridge.AdapterWeight attribute) linear_proj (bridge.diffusion.models.common.dit_attention.DiTCrossAttentionSubmodules attribute) (bridge.diffusion.models.flux.flux_attention.JointSelfAttentionSubmodules attribute) linear_q (bridge.diffusion.models.common.dit_attention.DiTCrossAttentionSubmodules attribute) linear_qkv (bridge.diffusion.models.flux.flux_attention.JointSelfAttentionSubmodules attribute) linear_value_head_dim (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) LinearAdapter (class in bridge.peft.lora_layers) LinearInterpolationSchedule (class in bridge.diffusion.common.flow_matching.flow_matching_pipeline) list_callbacks() (bridge.training.callbacks.CallbackManager method) list_supported_models() (bridge.models.conversion.auto_bridge.AutoBridge class method) llama2_7b_pretrain_config() (in module bridge.recipes.llama.llama2) llama31_405b_peft_config() (in module bridge.recipes.llama.llama3) llama31_405b_pretrain_config() (in module bridge.recipes.llama.llama3) llama31_405b_sft_config() (in module bridge.recipes.llama.llama3) llama31_70b_peft_config() (in module bridge.recipes.llama.llama3) llama31_70b_pretrain_config() (in module bridge.recipes.llama.llama3) llama31_70b_sft_config() (in module bridge.recipes.llama.llama3) llama31_8b_peft_config() (in module bridge.recipes.llama.llama3) llama31_8b_pretrain_config() (in module bridge.recipes.llama.llama3) llama31_8b_sft_config() (in module bridge.recipes.llama.llama3) llama32_1b_peft_config() (in module bridge.recipes.llama.llama3) llama32_1b_pretrain_config() (in module bridge.recipes.llama.llama3) llama32_1b_sft_config() (in module bridge.recipes.llama.llama3) llama32_3b_peft_config() (in module bridge.recipes.llama.llama3) llama32_3b_pretrain_config() (in module bridge.recipes.llama.llama3) llama32_3b_sft_config() (in module bridge.recipes.llama.llama3) llama3_70b_16k_pretrain_config() (in module bridge.recipes.llama.llama3) llama3_70b_64k_pretrain_config() (in module bridge.recipes.llama.llama3) llama3_70b_peft_config() (in module bridge.recipes.llama.llama3) llama3_70b_pretrain_config() (in module bridge.recipes.llama.llama3) llama3_70b_sft_config() (in module bridge.recipes.llama.llama3) llama3_8b_128k_pretrain_config() (in module bridge.recipes.llama.llama3) llama3_8b_16k_pretrain_config() (in module bridge.recipes.llama.llama3) llama3_8b_64k_pretrain_config() (in module bridge.recipes.llama.llama3) llama3_8b_low_precision_pretrain_config() (in module bridge.recipes.llama.llama3) llama3_8b_peft_config() (in module bridge.recipes.llama.llama3) llama3_8b_pretrain_config() (in module bridge.recipes.llama.llama3) llama3_8b_sft_config() (in module bridge.recipes.llama.llama3) Llama4SelfAttention (class in bridge.models.llama.llama4_utils) LLAMA_TOKENS (in module bridge.data.vlm_datasets.token_utils) LlamaBridge (class in bridge.models.llama.llama_bridge) LlamaNemotronBridge (class in bridge.models.llama_nemotron.llama_nemotron_bridge) LlamaNemotronHeterogeneousProvider (class in bridge.models.llama_nemotron.llama_nemotron_provider) LLAVA_TOKENS (in module bridge.data.vlm_datasets.token_utils) LlavaMimoProvider (class in bridge.models.mimo.llava_provider) LLM_FINETUNE_PRESETS (in module bridge.recipes.utils.dataset_utils) load() (bridge.training.checkpointing.CheckpointManager method) (bridge.training.checkpointing.DefaultCheckpointManager method) load_checkpoint() (in module bridge.training.checkpointing) load_config_only (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.T5Config attribute) load_dist_ckpt (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) load_file() (bridge.data.datasets.utils._TextMemMapDataset method) load_hf_weights() (bridge.models.conversion.auto_bridge.AutoBridge method) load_megatron_model() (bridge.models.conversion.auto_bridge.AutoBridge method) (in module bridge.training.model_load_save) load_model_config() (in module bridge.training.model_load_save) load_modelopt_state() (in module bridge.training.post_training.checkpointing) load_state_dict() (bridge.diffusion.data.common.diffusion_energon_datamodule.DiffusionDataModule method) (bridge.training.state.TrainState method) load_tensors() (bridge.models.hf_pretrained.state.DictStateSource method) (bridge.models.hf_pretrained.state.SafeTensorsStateSource method) (bridge.models.hf_pretrained.state.StateSource method) load_text_encoders() (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.FluxInferencePipeline method) load_tokenizer() (in module bridge.training.model_load_save) load_vae() (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.FluxInferencePipeline method) load_weights_hf_to_megatron() (bridge.models.conversion.model_bridge.MegatronModelBridge method) LOCAL (bridge.training.checkpointing.CheckpointType attribute) local_layer_spec() (in module bridge.models.gpt.gpt_builder) (in module bridge.models.gpt_provider) (in module bridge.models.t5_provider) local_pg_collection (bridge.training.setup_mimo.MimoSetupOutput attribute) log (in module bridge.diffusion.models.common.dit_embeddings) log_dir (bridge.training.config.TensorInspectConfig attribute) LOG_FUSION_DISABLE (in module bridge.utils.fusions) log_non_default_values() (bridge.training.config.ConfigContainer method) log_single_rank() (in module bridge.training.utils.log_utils) log_task_config (bridge.recipes.run_plugins.WandbPlugin attribute) logger (bridge.training.config.ConfigContainer attribute) (in module bridge.data.builders.finetuning_dataset) (in module bridge.data.builders.hf_dataset) (in module bridge.data.datasets.fim_dataset) (in module bridge.data.datasets.packed_parquet) (in module bridge.data.datasets.packed_sequence) (in module bridge.data.datasets.packing_utils) (in module bridge.data.datasets.sft) (in module bridge.data.datasets.utils) (in module bridge.data.energon.base_energon_datamodule) (in module bridge.diffusion.common.flow_matching.flow_matching_pipeline) (in module bridge.diffusion.data.flux.flux_energon_datamodule) (in module bridge.diffusion.data.wan.wan_energon_datamodule) (in module bridge.diffusion.models.flux.flux_provider) (in module bridge.diffusion.models.flux.flux_step) (in module bridge.diffusion.models.wan.wan_provider) (in module bridge.diffusion.models.wan.wan_step) (in module bridge.models.bailing.bailing_moe2_bridge) (in module bridge.models.bailing.modeling_bailing_moe_v2) (in module bridge.models.common.unimodal) (in module bridge.models.conversion.auto_bridge) (in module bridge.models.conversion.model_bridge) (in module bridge.models.conversion.param_mapping) (in module bridge.models.distillation_provider) (in module bridge.models.glm.glm45_bridge) (in module bridge.models.gpt.gpt_builder) (in module bridge.models.gpt_provider) (in module bridge.models.hf_pretrained.base) (in module bridge.models.hf_pretrained.state) (in module bridge.models.hf_pretrained.utils) (in module bridge.models.kimi_vl.modeling_kimi_k25_vl) (in module bridge.models.llama.llama_bridge) (in module bridge.models.llama_nemotron.llama_nemotron_provider) (in module bridge.models.mamba.mamba_builder) (in module bridge.models.mamba.mamba_provider) (in module bridge.models.ministral3.ministral3_provider) (in module bridge.models.mistral.mistral_provider) (in module bridge.models.nemotronh.nemotron_h_bridge) (in module bridge.models.nemotronh.nemotron_h_provider) (in module bridge.models.olmoe.olmoe_bridge) (in module bridge.models.qwen3_asr.hf_qwen3_asr.configuration_qwen3_asr) (in module bridge.models.qwen_vl.qwen35_vl_bridge) (in module bridge.models.qwen_vl.qwen3_vl_step) (in module bridge.models.sarvam.sarvam_provider) (in module bridge.models.t5_provider) (in module bridge.peft.base) (in module bridge.peft.canonical_lora) (in module bridge.peft.dora) (in module bridge.peft.lora) (in module bridge.recipes.run_plugins) (in module bridge.recipes.utils.dataset_utils) (in module bridge.training.audio_lm_step) (in module bridge.training.callbacks) (in module bridge.training.checkpointing) (in module bridge.training.flex_dispatcher_backend) (in module bridge.training.gpt_step) (in module bridge.training.inprocess_restart) (in module bridge.training.llava_step) (in module bridge.training.mimo_parallel_utils) (in module bridge.training.mimo_step) (in module bridge.training.model_load_save) (in module bridge.training.pretrain_mimo) (in module bridge.training.setup_mimo) (in module bridge.training.train_mimo) (in module bridge.training.utils.checkpoint_utils) (in module bridge.training.utils.config_utils) (in module bridge.training.utils.log_utils) (in module bridge.training.utils.moe_token_drop) (in module bridge.training.utils.omegaconf_utils) (in module bridge.training.vlm_step) (in module bridge.utils.activation_map) (in module bridge.utils.decorators) (in module bridge.utils.fusions) (in module bridge.utils.import_utils) logger_name (bridge.training.config.NVRxStragglerDetectionConfig attribute) LoggerConfig (class in bridge.training.config) logging_level (bridge.training.config.LoggerConfig attribute) logical_and_across_model_parallel_group() (in module bridge.training.utils.train_utils) logit_softcapping() (in module bridge.models.gemma.gemma2_provider) logits (bridge.models.bailing.modeling_bailing_moe_v2.MoEV2CausalLMOutputWithPast attribute) LoRA (class in bridge.peft.lora) lora_A_init_method (bridge.peft.canonical_lora.CanonicalLoRA attribute) (bridge.peft.dora.DoRA attribute) (bridge.peft.lora.LoRA attribute) lora_B_init_method (bridge.peft.canonical_lora.CanonicalLoRA attribute) (bridge.peft.dora.DoRA attribute) (bridge.peft.lora.LoRA attribute) lora_dtype (bridge.peft.lora.LoRA attribute) LoRALinear (class in bridge.peft.lora_layers) LoRALinearSplitFC1UpGate (class in bridge.peft.canonical_lora) LoRALinearSplitQKV (class in bridge.peft.canonical_lora) LoRAMerge (class in bridge.peft.lora) LoRATopKRouter (class in bridge.peft.lora_layers) loss (bridge.models.bailing.modeling_bailing_moe_v2.MoEV2CausalLMOutputWithPast attribute) loss_dict (bridge.training.callbacks.CallbackContext attribute) loss_func() (in module bridge.training.mimo_step) loss_func_kd() (in module bridge.training.post_training.distillation) loss_mask (bridge.data.energon.hf_encoder_task_encoder.HFEncoderTaskBatch attribute) (bridge.data.energon.hf_encoder_task_encoder.HFEncoderTaskSample attribute) (bridge.diffusion.data.common.diffusion_sample.DiffusionSample attribute) loss_scale (bridge.training.mixed_precision.MixedPrecisionConfig attribute) loss_scale_window (bridge.training.mixed_precision.MixedPrecisionConfig attribute) LossFunction (in module bridge.training.forward_step_func_types) LossFunctionReturn (in module bridge.training.forward_step_func_types) LossReduced (in module bridge.training.forward_step_func_types) lr (bridge.recipes.qwen_vl.qwen3_vl.Qwen3VLCommonKwargs attribute) lr_decay_iters (bridge.recipes.qwen_vl.qwen3_vl.Qwen3VLCommonKwargs attribute) lr_warmup_iters (bridge.recipes.qwen_vl.qwen3_vl.Qwen3VLCommonKwargs attribute) M main_input_name (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRAudioEncoder attribute) MAJOR (in module bridge.package_info) make_cord_v2_dataset() (in module bridge.data.vlm_datasets.hf_dataset_makers) make_cv17_dataset() (in module bridge.data.vlm_datasets.hf_dataset_makers) make_data_iterator_list() (in module bridge.data.iterator_utils) make_default_audio_dataset() (in module bridge.data.vlm_datasets.hf_dataset_makers) make_llava_video_178k_dataset() (in module bridge.data.vlm_datasets.hf_dataset_makers) make_medpix_dataset() (in module bridge.data.vlm_datasets.hf_dataset_makers) make_raven_dataset() (in module bridge.data.vlm_datasets.hf_dataset_makers) make_rdr_dataset() (in module bridge.data.vlm_datasets.hf_dataset_makers) make_vocab_size_divisible_by (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) (bridge.models.gpt.gpt_builder.GPTModelConfig attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.mamba.mamba_builder.MambaModelConfig attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) make_vocab_size_divisible_by() (bridge.models.conversion.model_bridge.MegatronModelBridge method) maker_kwargs (bridge.data.vlm_datasets.hf_provider.HFDatasetConversationProvider attribute) maker_name (bridge.data.vlm_datasets.hf_provider.HFDatasetConversationProvider attribute) mamba_head_dim (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.Nemotron3NanoProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronNanoModelProvider12Bv2 attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronNanoModelProvider9Bv2 attribute) mamba_num_groups (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider attribute) mamba_num_heads (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.Nemotron3NanoProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider47B attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider4B attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider56B attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider8B attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronNanoModelProvider12Bv2 attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronNanoModelProvider9Bv2 attribute) mamba_stack_spec (bridge.models.mamba.mamba_builder.MambaModelConfig attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) mamba_state_dim (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.Nemotron3NanoProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider47B attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider4B attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider56B attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider8B attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronNanoModelProvider12Bv2 attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronNanoModelProvider9Bv2 attribute) MambaConv1dMapping (class in bridge.models.conversion.param_mapping) MambaInProjMapping (class in bridge.models.conversion.param_mapping) MambaModelBuilder (class in bridge.models.mamba.mamba_builder) MambaModelConfig (class in bridge.models.mamba.mamba_builder) MambaModelProvider (class in bridge.models.mamba.mamba_provider) manual_gc_interval (bridge.recipes.run_plugins.PerfEnvPlugin attribute) (bridge.recipes.run_plugins.PerfEnvPluginScriptArgs attribute) map() (in module bridge.peft.walk_utils) mapping (bridge.models.conversion.model_bridge.WeightConversionTask attribute) mapping_registry() (bridge.diffusion.conversion.flux.flux_bridge.FluxBridge method) (bridge.diffusion.conversion.wan.wan_bridge.WanBridge method) (bridge.models.bailing.bailing_moe2_bridge.BailingMoeV2Bridge method) (bridge.models.conversion.model_bridge.MegatronModelBridge method) (bridge.models.deepseek.deepseek_v2_bridge.DeepSeekV2Bridge method) (bridge.models.deepseek.deepseek_v3_bridge.DeepSeekV3Bridge method) (bridge.models.gemma.gemma2_bridge.Gemma2Bridge method) (bridge.models.gemma.gemma3_bridge.Gemma3ModelBridge method) (bridge.models.gemma.gemma_bridge.GemmaBridge method) (bridge.models.gemma_vl.gemma3_vl_bridge.Gemma3VLBridge method) (bridge.models.glm.glm45_bridge.GLM45Bridge method) (bridge.models.glm_vl.glm_45v_bridge.GLM45VBridge method) (bridge.models.gpt_oss.gpt_oss_bridge.GPTOSSBridge method) (bridge.models.kimi.kimi_bridge.KimiK2Bridge method) (bridge.models.kimi_vl.kimi_k25_vl_bridge.KimiK25VLBridge method) (bridge.models.llama.llama_bridge.LlamaBridge method) (bridge.models.llama_nemotron.llama_nemotron_bridge.LlamaNemotronBridge method) (bridge.models.mimo.mimo_bridge.MimoBridge method) (bridge.models.minimax_m2.minimax_m2_bridge.MiniMaxM2Bridge method) (bridge.models.ministral3.ministral3_bridge.Ministral3Bridge method) (bridge.models.mistral.mistral_bridge.MistralBridge method) (bridge.models.nemotron.nemotron_bridge.NemotronBridge method) (bridge.models.nemotron_vl.nemotron_vl_bridge.NemotronVLBridge method) (bridge.models.nemotronh.nemotron_h_bridge.NemotronHBridge method) (bridge.models.olmoe.olmoe_bridge.OlMoEBridge method) (bridge.models.qwen.qwen2_bridge.Qwen2Bridge method) (bridge.models.qwen.qwen3_bridge.Qwen3Bridge method) (bridge.models.qwen.qwen3_moe_bridge.Qwen3MoEBridge method) (bridge.models.qwen.qwen3_next_bridge.Qwen3NextBridge method) (bridge.models.qwen3_asr.qwen3_asr_bridge.Qwen3ASRBridge method) (bridge.models.qwen_audio.qwen2_audio_bridge.Qwen2AudioBridge method) (bridge.models.qwen_omni.qwen25_omni_bridge.Qwen25OmniBridge method) (bridge.models.qwen_vl.qwen25_vl_bridge.Qwen25VLBridge method) (bridge.models.qwen_vl.qwen35_vl_bridge.Qwen35VLBridge method) (bridge.models.qwen_vl.qwen35_vl_bridge.Qwen35VLMoEBridge method) (bridge.models.qwen_vl.qwen3_vl_bridge.Qwen3VLBridge method) (bridge.models.qwen_vl.qwen3_vl_bridge.Qwen3VLMoEBridge method) (bridge.models.sarvam.sarvam_mla_bridge.SarvamMLABridge method) (bridge.models.sarvam.sarvam_moe_bridge.SarvamMoEBridge method) MappingT (in module bridge.models.conversion.model_bridge) masked_next_token_loss() (in module bridge.training.losses) masked_softmax_fusion (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.llama_nemotron.llama_nemotron_provider.LlamaNemotronHeterogeneousProvider attribute) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) match() (bridge.peft.module_matcher.ModuleMatcher method) materialize_adapter_weights() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) MAX_AREA_CONFIGS (in module bridge.diffusion.models.wan.inference) max_iterations (bridge.training.config.InProcessRestartConfig attribute) max_length (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.ClipConfig attribute) (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.T5Config attribute) (bridge.models.hf_pretrained.causal_lm.EncodeKwargs attribute) (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) max_new_tokens (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) max_position_embeddings (bridge.models.t5_provider.T5ModelProvider attribute) max_rank_faults (bridge.training.config.InProcessRestartConfig attribute) max_train_samples (bridge.training.config.FinetuningDatasetConfig attribute) max_vision_cuda_graph_seq_length (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config.Qwen3VLTransformerConfig attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) maybe_check_weight_hash_across_dp_replicas() (in module bridge.training.train) maybe_dequantize() (bridge.models.conversion.param_mapping.MegatronParamMapping method) maybe_enable_recompute_inputs_grad() (in module bridge.peft.recompute) maybe_finalize_async_save() (in module bridge.training.checkpointing) maybe_inject_state() (in module bridge.training.utils.train_utils) maybe_log_and_save_config() (in module bridge.training.setup) maybe_modify_converted_hf_weight() (bridge.diffusion.conversion.flux.flux_bridge.FluxBridge method) (bridge.models.conversion.model_bridge.MegatronModelBridge method) (bridge.models.deepseek.deepseek_v3_bridge.DeepSeekV3Bridge method) (bridge.models.kimi_vl.kimi_k25_vl_bridge.KimiK25VLBridge method) (bridge.models.mimo.mimo_bridge.MimoBridge method) maybe_modify_loaded_hf_weight() (bridge.diffusion.conversion.flux.flux_bridge.FluxBridge method) (bridge.models.conversion.model_bridge.MegatronModelBridge method) (bridge.models.gpt_oss.gpt_oss_bridge.GPTOSSBridge method) (bridge.models.kimi_vl.kimi_k25_vl_bridge.KimiK25VLBridge method) (bridge.models.mimo.mimo_bridge.MimoBridge method) (bridge.models.minimax_m2.minimax_m2_bridge.MiniMaxM2Bridge method) (bridge.models.ministral3.ministral3_bridge.Ministral3Bridge method) maybe_path_or_url_to_data_urls() (in module bridge.models.nemotron_vl.nemotron_vl_utils) maybe_report_stragglers() (in module bridge.training.train) maybe_run_manual_gc() (in module bridge.training.train) maybe_save_dataloader_state() (in module bridge.training.checkpointing) maybe_setup_simulated_fault() (in module bridge.training.fault_tolerance) maybe_synchronize_training_step() (in module bridge.training.train) maybe_wrap_for_inprocess_restart() (in module bridge.training.inprocess_restart) media_placeholder_token_id (bridge.models.kimi_vl.kimi_k25_vl_provider.KimiK25VLModelProvider attribute) megatron_cpu_init_context() (in module bridge.training.model_load_save) megatron_module (bridge.models.conversion.model_bridge.WeightConversionTask attribute) megatron_to_hf() (bridge.models.conversion.model_bridge._HFNameSuffixMapping method) (bridge.models.conversion.param_mapping.AutoMapping method) (bridge.models.conversion.param_mapping.ChunkedMapping method) (bridge.models.conversion.param_mapping.ColumnParallelMapping method) (bridge.models.conversion.param_mapping.ConcatenatedQKVMapping method) (bridge.models.conversion.param_mapping.DirectMapping method) (bridge.models.conversion.param_mapping.FusedGatedExpertMapping method) (bridge.models.conversion.param_mapping.GatedMLPMapping method) (bridge.models.conversion.param_mapping.GDNLinearMapping method) (bridge.models.conversion.param_mapping.GDNLinearMappingSeparate method) (bridge.models.conversion.param_mapping.KVMapping method) (bridge.models.conversion.param_mapping.MambaInProjMapping method) (bridge.models.conversion.param_mapping.MegatronParamMapping method) (bridge.models.conversion.param_mapping.QKVMapping method) (bridge.models.conversion.param_mapping.ReplicatedMapping method) (bridge.models.conversion.param_mapping.RMSNorm2ZeroCenteredRMSNormMapping method) (bridge.models.conversion.param_mapping.RowParallelMapping method) (bridge.models.conversion.quant_mapping.AmaxFanoutMapping method) (bridge.models.gpt_oss.gpt_oss_bridge.GPTOSSMLPDownProjMapping method) (bridge.models.gpt_oss.gpt_oss_bridge.GPTOSSMLPGateUpProjMapping method) (bridge.models.minimax_m2.minimax_m2_bridge._FullDimQKNormMapping method) (bridge.models.nemotronh.nemotron_h_bridge._MTPFlatteningMapping method) (bridge.models.nemotronh.nemotron_h_bridge._MTPFlatteningQKVMapping method) megatron_to_hf_activation() (bridge.models.conversion.model_bridge.MegatronModelBridge class method) megatron_to_hf_config() (bridge.models.conversion.model_bridge.MegatronModelBridge class method) (bridge.models.deepseek.deepseek_v2_bridge.DeepSeekV2Bridge class method) (bridge.models.deepseek.deepseek_v3_bridge.DeepSeekV3Bridge class method) (bridge.models.gemma.gemma2_bridge.Gemma2Bridge class method) (bridge.models.gemma.gemma3_bridge.Gemma3ModelBridge class method) (bridge.models.llama.llama_bridge.LlamaBridge class method) (bridge.models.llama_nemotron.llama_nemotron_bridge.LlamaNemotronBridge class method) (bridge.models.nemotronh.nemotron_h_bridge.NemotronHBridge class method) (bridge.models.qwen.qwen3_moe_bridge.Qwen3MoEBridge class method) megatron_to_hf_lookup() (bridge.models.conversion.mapping_registry.MegatronMappingRegistry method) MEGATRON_TO_HF_LORA_SUFFIX (in module bridge.models.conversion.peft_bridge) MEGATRON_TOKENIZERS (in module bridge.training.tokenizers.tokenizer) MegatronFluxAdapter (class in bridge.diffusion.models.flux.flow_matching.flux_adapter) MegatronMappingRegistry (class in bridge.models.conversion.mapping_registry) MegatronModel (in module bridge.models.conversion.model_bridge) (in module bridge.models.conversion.peft_bridge) MegatronModelBridge (class in bridge.models.conversion.model_bridge) MegatronModelT (in module bridge.models.conversion.auto_bridge) MegatronParamMapping (class in bridge.models.conversion.param_mapping) MegatronPeftBridge (class in bridge.models.conversion.peft_bridge) MegatronPretrainingBatchSampler (class in bridge.data.samplers) MegatronPretrainingRandomSampler (class in bridge.data.samplers) MegatronPretrainingSampler (class in bridge.data.samplers) MegatronWeightTuple (class in bridge.models.conversion.model_bridge) memmap_workers (bridge.training.config.FinetuningDatasetConfig attribute) MEMORY_KEYS (in module bridge.training.utils.train_utils) memory_snapshot_path (bridge.recipes.run_plugins.PyTorchProfilerPlugin attribute) (bridge.recipes.run_plugins.PyTorchProfilerPluginScriptArgs attribute) merge() (bridge.peft.lora.LoRAMerge method) merge_file (bridge.training.tokenizers.config.TokenizerConfig attribute) merge_gdn_linear_weights() (in module bridge.models.conversion.param_mapping) merge_kv_biases() (in module bridge.models.conversion.param_mapping) merge_kv_weights() (in module bridge.models.conversion.param_mapping) merge_qkv_biases() (in module bridge.models.conversion.param_mapping) merge_qkv_weights() (in module bridge.models.conversion.param_mapping) meta_model (bridge.models.model_provider.ModelProviderMixin property) metadata_path (bridge.training.tokenizers.config.TokenizerConfig attribute) method (bridge.training.comm_overlap.BulkOverlapCfg attribute) (bridge.training.comm_overlap.PipelineOverlapCfg attribute) (bridge.training.comm_overlap.RingExchangeOverlapCfg attribute) micro_batch_size (bridge.data.energon.energon_provider.EnergonProvider attribute) (bridge.diffusion.data.common.diffusion_energon_datamodule.DiffusionDataModuleConfig attribute) (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDataModuleConfig attribute) (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDatasetConfig attribute) (bridge.diffusion.data.flux.flux_mock_datamodule.FluxMockDataModuleConfig attribute) (bridge.diffusion.data.wan.wan_energon_datamodule.WanDataModuleConfig attribute) (bridge.diffusion.data.wan.wan_energon_datamodule.WanDatasetConfig attribute) (bridge.diffusion.data.wan.wan_mock_datamodule.WanMockDataModuleConfig attribute) (bridge.recipes.qwen_vl.qwen3_vl.Qwen3VLCommonKwargs attribute) mimo_collate_fn() (in module bridge.data.mimo.collate) mimo_infra (bridge.training.setup_mimo.MimoSetupOutput attribute) mimo_parallelism_config (bridge.models.mimo.mimo_provider.MimoModelProvider attribute) mimo_runtime_config_update() (in module bridge.training.config) MimoBridge (class in bridge.models.mimo.mimo_bridge) MimoDataset (class in bridge.data.mimo.dataset) MimoDatasetProvider (class in bridge.data.mimo.base_provider) MimoModelInfra (class in bridge.models.mimo.mimo_provider) MimoModelProvider (class in bridge.models.mimo.mimo_provider) MimoParallelismConfig (class in bridge.models.mimo.mimo_config) MimoSetupOutput (class in bridge.training.setup_mimo) min_length (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) min_loss_scale (bridge.training.mixed_precision.MixedPrecisionConfig attribute) min_lr (bridge.recipes.qwen_vl.qwen3_vl.Qwen3VLCommonKwargs attribute) minimax_m2_layer_spec() (in module bridge.models.minimax_m2.minimax_m2_provider) MiniMaxM2Bridge (class in bridge.models.minimax_m2.minimax_m2_bridge) ministral3_14b_peft_config() (in module bridge.recipes.ministral3.ministral3) ministral3_14b_sft_config() (in module bridge.recipes.ministral3.ministral3) ministral3_3b_peft_config() (in module bridge.recipes.ministral3.ministral3) ministral3_3b_sft_config() (in module bridge.recipes.ministral3.ministral3) ministral3_8b_peft_config() (in module bridge.recipes.ministral3.ministral3) ministral3_8b_sft_config() (in module bridge.recipes.ministral3.ministral3) ministral3_collate_fn() (in module bridge.data.vlm_datasets.collate) Ministral3Bridge (class in bridge.models.ministral3.ministral3_bridge) Ministral3Model (class in bridge.models.ministral3.modeling_ministral3) Ministral3ModelProvider (class in bridge.models.ministral3.ministral3_provider) Ministral3ModelProvider14B (class in bridge.models.ministral3.ministral3_provider) Ministral3ModelProvider3B (class in bridge.models.ministral3.ministral3_provider) Ministral3ModelProvider8B (class in bridge.models.ministral3.ministral3_provider) ministral_layer_spec() (in module bridge.models.ministral3.ministral3_provider) MinistralTEDotProductAttention (class in bridge.models.ministral3.ministral3_provider) MINOR (in module bridge.package_info) MISSING_NEMO_EXPORT_DEPLOY_MSG (in module bridge.utils.import_utils) MISSING_NEMO_RUN_MSG (in module bridge.utils.import_utils) MISSING_NVINSPECT_MSG (in module bridge.training.tensor_inspect) MISSING_NVRX_MSG (in module bridge.utils.import_utils) MISSING_QWEN_VL_UTILS_MSG (in module bridge.data.vlm_datasets.collate) MistralBridge (class in bridge.models.mistral.mistral_bridge) MistralModelProvider (class in bridge.models.mistral.mistral_provider) MistralSmall3ModelProvider24B (class in bridge.models.mistral.mistral_provider) mixed_precision (bridge.training.config.ConfigContainer attribute) MIXED_PRECISION_RECIPES (in module bridge.training.mixed_precision) mixed_precision_wrapper (bridge.models.model_provider.GetModelKwargs attribute) MixedPrecisionConfig (class in bridge.training.mixed_precision) MLA_ROPE_SCALING_MAPPING (bridge.models.conversion.model_bridge.MegatronModelBridge attribute) mla_transformer_config (bridge.models.conversion.auto_bridge.AutoBridge property) MLAModelProvider (class in bridge.models.mla_provider) MLATransformerConfig (class in bridge.models.transformer_config) mlflow_experiment (bridge.training.config.LoggerConfig attribute) mlflow_logger (bridge.training.state.GlobalState property) mlflow_run_name (bridge.training.config.LoggerConfig attribute) mlflow_tags (bridge.training.config.LoggerConfig attribute) mlflow_tracking_uri (bridge.training.config.LoggerConfig attribute) mlp_only_layers (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) MLPEmbedder (class in bridge.diffusion.models.flux.layers) mm_token_type_ids (bridge.training.utils.visual_inputs.GenericVisualInputs attribute) mm_tokens_per_image (bridge.models.gemma_vl.gemma3_vl_provider.Gemma3VLModelProvider attribute) MMDiTLayer (class in bridge.diffusion.models.flux.flux_layer_spec) mock (bridge.recipes.qwen_vl.qwen3_vl.Qwen3VLCommonKwargs attribute) mock_batch() (in module bridge.diffusion.data.wan.wan_mock_datamodule) MockGPTDatasetConfig (class in bridge.training.config) MockMimoProvider (class in bridge.data.mimo.mock_provider) MockQwen25VLDataset (class in bridge.recipes.qwen_vl.qwen25_vl_dataset) MockQwen25VLDatasetProvider (class in bridge.recipes.qwen_vl.qwen25_vl_dataset) MockVLMConversationProvider (class in bridge.data.vlm_datasets.mock_provider) modality_columns (bridge.data.mimo.hf_provider.HFMimoDatasetProvider attribute) modality_configs (bridge.data.mimo.mock_provider.MockMimoProvider attribute) modality_submodules_spec (bridge.models.mimo.mimo_provider.MimoModelProvider attribute) model (bridge.models.hf_pretrained.base.PreTrainedBase property) (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM property) (bridge.models.hf_pretrained.vlm.PreTrainedVLM property) (bridge.training.callbacks.CallbackContext attribute) (bridge.training.checkpointing.CheckpointLoadContext attribute) (bridge.training.checkpointing.CheckpointSaveContext attribute) (bridge.training.config.ConfigContainer attribute) (bridge.training.config.OptimizerConfigOverrideProviderContext attribute) (bridge.training.setup.SetupOutput attribute) (bridge.training.setup_mimo.MimoSetupOutput attribute) model_channels (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) model_input_names (bridge.models.qwen3_asr.hf_qwen3_asr.processing_qwen3_asr.Qwen3ASRProcessor property) model_name_or_path (bridge.diffusion.conversion.flux.flux_hf_pretrained.PreTrainedFlux property) (bridge.diffusion.conversion.wan.wan_hf_pretrained.PreTrainedWAN property) (bridge.models.hf_pretrained.base.PreTrainedBase attribute) (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM property) (bridge.models.hf_pretrained.vlm.PreTrainedVLM property) model_type (bridge.models.bailing.configuration_bailing_moe_v2.BailingMoeV2Config attribute) MODEL_TYPE (bridge.models.conversion.model_bridge.MegatronModelBridge attribute) model_type (bridge.models.model_provider.GetModelKwargs attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.configuration_qwen3_asr.Qwen3ASRAudioEncoderConfig attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.configuration_qwen3_asr.Qwen3ASRConfig attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.configuration_qwen3_asr.Qwen3ASRTextConfig attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.configuration_qwen3_asr.Qwen3ASRThinkerConfig attribute) ModelAdapter (class in bridge.diffusion.common.flow_matching.adapters.base) ModelBuilder (class in bridge.models.common.base) ModelConfig (class in bridge.models.common.base) modelopt_mamba_stack_spec() (in module bridge.models.mamba.mamba_builder) (in module bridge.models.mamba.mamba_provider) modelopt_transformer_layer_spec() (in module bridge.models.gpt.gpt_builder) (in module bridge.models.gpt_provider) ModelOptDistillConfig (class in bridge.training.post_training.distillation) ModelParallelKwargs (class in bridge.models.model_provider) ModelProviderMixin (class in bridge.models.model_provider) ModelProviderTarget (in module bridge.models.conversion.model_bridge) ModelT (in module bridge.models.common.base) (in module bridge.models.model_provider) ModelType (in module bridge.peft.base) modulate() (bridge.diffusion.models.flux.flux_layer_spec.AdaLN method) (bridge.diffusion.models.wan.wan_layer_spec.WanAdaLN method) modulated_layernorm() (bridge.diffusion.models.flux.flux_layer_spec.AdaLN method) module bridge bridge.data bridge.data.builders bridge.data.builders.finetuning_dataset bridge.data.builders.hf_dataset bridge.data.datasets bridge.data.datasets.fim_dataset bridge.data.datasets.packed_parquet bridge.data.datasets.packed_sequence bridge.data.datasets.packing_utils bridge.data.datasets.sft bridge.data.datasets.utils bridge.data.energon bridge.data.energon.base_energon_datamodule bridge.data.energon.energon_provider bridge.data.energon.hf_encoder_task_encoder bridge.data.energon.task_encoder_utils bridge.data.finetuning bridge.data.hf_processors bridge.data.hf_processors.gsm8k bridge.data.hf_processors.openmathinstruct2 bridge.data.hf_processors.squad bridge.data.iterator_utils bridge.data.loaders bridge.data.mimo bridge.data.mimo.base_provider bridge.data.mimo.collate bridge.data.mimo.dataset bridge.data.mimo.dp_utils bridge.data.mimo.hf_provider bridge.data.mimo.loaders bridge.data.mimo.mock_provider bridge.data.samplers bridge.data.utils bridge.data.vlm_datasets bridge.data.vlm_datasets.collate bridge.data.vlm_datasets.conversation_dataset bridge.data.vlm_datasets.hf_dataset_makers bridge.data.vlm_datasets.hf_provider bridge.data.vlm_datasets.mock_provider bridge.data.vlm_datasets.preloaded_provider bridge.data.vlm_datasets.token_utils bridge.diffusion bridge.diffusion.base bridge.diffusion.common bridge.diffusion.common.flow_matching bridge.diffusion.common.flow_matching.adapters bridge.diffusion.common.flow_matching.adapters.base bridge.diffusion.common.flow_matching.adapters.simple bridge.diffusion.common.flow_matching.flow_matching_pipeline bridge.diffusion.conversion bridge.diffusion.conversion.flux bridge.diffusion.conversion.flux.flux_bridge bridge.diffusion.conversion.flux.flux_hf_pretrained bridge.diffusion.conversion.wan bridge.diffusion.conversion.wan.wan_bridge bridge.diffusion.conversion.wan.wan_hf_pretrained bridge.diffusion.data bridge.diffusion.data.common bridge.diffusion.data.common.diffusion_energon_datamodule bridge.diffusion.data.common.diffusion_sample bridge.diffusion.data.common.diffusion_task_encoder_with_sp bridge.diffusion.data.common.sequence_packing_utils bridge.diffusion.data.flux bridge.diffusion.data.flux.flux_energon_datamodule bridge.diffusion.data.flux.flux_mock_datamodule bridge.diffusion.data.flux.flux_taskencoder bridge.diffusion.data.wan bridge.diffusion.data.wan.wan_energon_datamodule bridge.diffusion.data.wan.wan_mock_datamodule bridge.diffusion.data.wan.wan_taskencoder bridge.diffusion.models bridge.diffusion.models.common bridge.diffusion.models.common.dit_attention bridge.diffusion.models.common.dit_embeddings bridge.diffusion.models.common.normalization bridge.diffusion.models.flux bridge.diffusion.models.flux.flow_matching bridge.diffusion.models.flux.flow_matching.flux_adapter bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline bridge.diffusion.models.flux.flux_attention bridge.diffusion.models.flux.flux_layer_spec bridge.diffusion.models.flux.flux_model bridge.diffusion.models.flux.flux_provider bridge.diffusion.models.flux.flux_step bridge.diffusion.models.flux.layers bridge.diffusion.models.wan bridge.diffusion.models.wan.flow_matching bridge.diffusion.models.wan.flow_matching.flow_inference_pipeline bridge.diffusion.models.wan.flow_matching.flow_matching_pipeline_wan bridge.diffusion.models.wan.inference bridge.diffusion.models.wan.inference.utils bridge.diffusion.models.wan.rope_utils bridge.diffusion.models.wan.utils bridge.diffusion.models.wan.wan_layer_spec bridge.diffusion.models.wan.wan_model bridge.diffusion.models.wan.wan_provider bridge.diffusion.models.wan.wan_step bridge.diffusion.recipes bridge.diffusion.recipes.flux bridge.diffusion.recipes.flux.flux bridge.diffusion.recipes.wan bridge.diffusion.recipes.wan.wan bridge.models bridge.models.bailing bridge.models.bailing.bailing_moe2_bridge bridge.models.bailing.configuration_bailing_moe_v2 bridge.models.bailing.modeling_bailing_moe_v2 bridge.models.common bridge.models.common.base bridge.models.common.unimodal bridge.models.config bridge.models.conversion bridge.models.conversion.auto_bridge bridge.models.conversion.mapping_registry bridge.models.conversion.model_bridge bridge.models.conversion.param_mapping bridge.models.conversion.peft_bridge bridge.models.conversion.quant_mapping bridge.models.conversion.transformers_compat bridge.models.conversion.utils bridge.models.decorators bridge.models.decorators.dispatch bridge.models.decorators.torchrun bridge.models.deepseek bridge.models.deepseek.common bridge.models.deepseek.deepseek_v2_bridge bridge.models.deepseek.deepseek_v3_bridge bridge.models.distillation_provider bridge.models.gemma bridge.models.gemma.gemma2_bridge bridge.models.gemma.gemma2_provider bridge.models.gemma.gemma3_bridge bridge.models.gemma.gemma3_provider bridge.models.gemma.gemma_bridge bridge.models.gemma.gemma_provider bridge.models.gemma.modules bridge.models.gemma_vl bridge.models.gemma_vl.gemma3_vl_bridge bridge.models.gemma_vl.gemma3_vl_provider bridge.models.gemma_vl.modeling_gemma3_vl bridge.models.glm bridge.models.glm.glm45_bridge bridge.models.glm.glm_moe_mappings bridge.models.glm_vl bridge.models.glm_vl.glm_45v_bridge bridge.models.glm_vl.glm_45v_provider bridge.models.glm_vl.modeling_glm_45v bridge.models.gpt bridge.models.gpt.gpt_builder bridge.models.gpt_full_te_layer_autocast_spec bridge.models.gpt_oss bridge.models.gpt_oss.gpt_oss_bridge bridge.models.gpt_provider bridge.models.hf_pretrained bridge.models.hf_pretrained.base bridge.models.hf_pretrained.causal_lm bridge.models.hf_pretrained.safe_config_loader bridge.models.hf_pretrained.state bridge.models.hf_pretrained.utils bridge.models.hf_pretrained.vlm bridge.models.kimi bridge.models.kimi.kimi_bridge bridge.models.kimi_vl bridge.models.kimi_vl.kimi_k25_vl_bridge bridge.models.kimi_vl.kimi_k25_vl_provider bridge.models.kimi_vl.modeling_kimi_k25_vl bridge.models.kimi_vl.utils bridge.models.llama bridge.models.llama.llama4_utils bridge.models.llama.llama_bridge bridge.models.llama_nemotron bridge.models.llama_nemotron.llama_nemotron_bridge bridge.models.llama_nemotron.llama_nemotron_provider bridge.models.mamba bridge.models.mamba.mamba_builder bridge.models.mamba.mamba_provider bridge.models.mimo bridge.models.mimo.llava_provider bridge.models.mimo.mimo_bridge bridge.models.mimo.mimo_builder bridge.models.mimo.mimo_config bridge.models.mimo.mimo_ddp bridge.models.mimo.mimo_provider bridge.models.minimax_m2 bridge.models.minimax_m2.minimax_m2_bridge bridge.models.minimax_m2.minimax_m2_provider bridge.models.ministral3 bridge.models.ministral3.ministral3_bridge bridge.models.ministral3.ministral3_provider bridge.models.ministral3.modeling_ministral3 bridge.models.mistral bridge.models.mistral.mistral_bridge bridge.models.mistral.mistral_provider bridge.models.mla_provider bridge.models.model_provider bridge.models.nemotron bridge.models.nemotron.nemotron_bridge bridge.models.nemotron_vl bridge.models.nemotron_vl.modeling_nemotron_vl bridge.models.nemotron_vl.nemotron_vl_bridge bridge.models.nemotron_vl.nemotron_vl_provider bridge.models.nemotron_vl.nemotron_vl_utils bridge.models.nemotronh bridge.models.nemotronh.nemotron_h_bridge bridge.models.nemotronh.nemotron_h_provider bridge.models.olmoe bridge.models.olmoe.olmoe_bridge bridge.models.olmoe.olmoe_provider bridge.models.qwen bridge.models.qwen.qwen2_bridge bridge.models.qwen.qwen3_bridge bridge.models.qwen.qwen3_moe_bridge bridge.models.qwen.qwen3_next_bridge bridge.models.qwen3_asr bridge.models.qwen3_asr.hf_qwen3_asr bridge.models.qwen3_asr.hf_qwen3_asr.configuration_qwen3_asr bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr bridge.models.qwen3_asr.hf_qwen3_asr.processing_qwen3_asr bridge.models.qwen3_asr.modeling_qwen3_asr bridge.models.qwen3_asr.modeling_qwen3_asr.model bridge.models.qwen3_asr.modeling_qwen3_asr.rope bridge.models.qwen3_asr.modeling_qwen3_asr.thinker_model bridge.models.qwen3_asr.modeling_qwen3_asr.transformer_config bridge.models.qwen3_asr.qwen3_asr_bridge bridge.models.qwen3_asr.qwen3_asr_provider bridge.models.qwen_audio bridge.models.qwen_audio.modeling_qwen2_audio bridge.models.qwen_audio.qwen2_audio_bridge bridge.models.qwen_audio.qwen2_audio_provider bridge.models.qwen_omni bridge.models.qwen_omni.modeling_qwen25_omni bridge.models.qwen_omni.modeling_qwen25_omni.model bridge.models.qwen_omni.modeling_qwen25_omni.rope bridge.models.qwen_omni.modeling_qwen25_omni.thinker_model bridge.models.qwen_omni.modeling_qwen25_omni.transformer_config bridge.models.qwen_omni.qwen25_omni_bridge bridge.models.qwen_omni.qwen25_omni_provider bridge.models.qwen_vl bridge.models.qwen_vl.modeling_qwen25_vl bridge.models.qwen_vl.modelling_qwen3_vl bridge.models.qwen_vl.modelling_qwen3_vl.attention bridge.models.qwen_vl.modelling_qwen3_vl.model bridge.models.qwen_vl.modelling_qwen3_vl.rope bridge.models.qwen_vl.modelling_qwen3_vl.text_model bridge.models.qwen_vl.modelling_qwen3_vl.transformer_block bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config bridge.models.qwen_vl.modelling_qwen3_vl.utils bridge.models.qwen_vl.modelling_qwen3_vl.vision_model bridge.models.qwen_vl.qwen25_vl_bridge bridge.models.qwen_vl.qwen25_vl_provider bridge.models.qwen_vl.qwen35_vl_bridge bridge.models.qwen_vl.qwen35_vl_provider bridge.models.qwen_vl.qwen3_vl_bridge bridge.models.qwen_vl.qwen3_vl_provider bridge.models.qwen_vl.qwen3_vl_step bridge.models.sarvam bridge.models.sarvam.common bridge.models.sarvam.sarvam_mla_bridge bridge.models.sarvam.sarvam_moe_bridge bridge.models.sarvam.sarvam_provider bridge.models.t5_provider bridge.models.transformer_config bridge.package_info bridge.peft bridge.peft.adapter_wrapper bridge.peft.base bridge.peft.canonical_lora bridge.peft.dora bridge.peft.dora_layers bridge.peft.lora bridge.peft.lora_layers bridge.peft.module_matcher bridge.peft.recompute bridge.peft.utils bridge.peft.walk_utils bridge.recipes bridge.recipes.common bridge.recipes.deepseek bridge.recipes.deepseek.deepseek_v2 bridge.recipes.deepseek.deepseek_v3 bridge.recipes.flux bridge.recipes.gemma bridge.recipes.gemma.gemma2 bridge.recipes.gemma.gemma3 bridge.recipes.gemma3_vl bridge.recipes.gemma3_vl.gemma3_vl bridge.recipes.glm bridge.recipes.glm.glm45 bridge.recipes.glm_vl bridge.recipes.glm_vl.glm_45v bridge.recipes.gpt bridge.recipes.gpt.gpt3_175b bridge.recipes.gpt.vanilla_gpt bridge.recipes.gpt_oss bridge.recipes.gpt_oss.gpt_oss bridge.recipes.kimi_vl bridge.recipes.kimi_vl.kimi_k25_vl bridge.recipes.llama bridge.recipes.llama.llama2 bridge.recipes.llama.llama3 bridge.recipes.ministral3 bridge.recipes.ministral3.ministral3 bridge.recipes.moonlight bridge.recipes.moonlight.moonlight_16b bridge.recipes.nemotron_vl bridge.recipes.nemotron_vl.nemotron_nano_v2_vl bridge.recipes.nemotronh bridge.recipes.nemotronh.nemotron_3_nano bridge.recipes.nemotronh.nemotron_3_super bridge.recipes.nemotronh.nemotron_nano_v2 bridge.recipes.nemotronh.nemotronh bridge.recipes.olmoe bridge.recipes.olmoe.olmoe_7b bridge.recipes.qwen bridge.recipes.qwen.qwen2 bridge.recipes.qwen.qwen3 bridge.recipes.qwen.qwen3_moe bridge.recipes.qwen.qwen3_next bridge.recipes.qwen2_audio bridge.recipes.qwen2_audio.qwen2_audio bridge.recipes.qwen_vl bridge.recipes.qwen_vl.qwen25_vl bridge.recipes.qwen_vl.qwen25_vl_dataset bridge.recipes.qwen_vl.qwen35_vl bridge.recipes.qwen_vl.qwen3_vl bridge.recipes.run_plugins bridge.recipes.utils bridge.recipes.utils.dataset_utils bridge.recipes.utils.finetune_utils bridge.recipes.utils.optimizer_utils bridge.recipes.utils.tokenizer_utils bridge.recipes.wan bridge.training bridge.training.audio_lm_step bridge.training.callbacks bridge.training.checkpointing bridge.training.comm_overlap bridge.training.config bridge.training.distill bridge.training.eval bridge.training.fault_tolerance bridge.training.finetune bridge.training.flex_dispatcher_backend bridge.training.forward_step_func_types bridge.training.gpt_step bridge.training.initialize bridge.training.inprocess_restart bridge.training.llava_step bridge.training.losses bridge.training.mimo_parallel_utils bridge.training.mimo_step bridge.training.mixed_precision bridge.training.mlm_compat bridge.training.mlm_compat.activations bridge.training.mlm_compat.arguments bridge.training.mlm_compat.model bridge.training.model_load_save bridge.training.nvrx_straggler bridge.training.optim bridge.training.post_training bridge.training.post_training.checkpointing bridge.training.post_training.distillation bridge.training.pretrain bridge.training.pretrain_mimo bridge.training.profiling bridge.training.setup bridge.training.setup_mimo bridge.training.state bridge.training.tensor_inspect bridge.training.tokenizers bridge.training.tokenizers.config bridge.training.tokenizers.tokenizer bridge.training.train bridge.training.train_mimo bridge.training.utils bridge.training.utils.batch_utils bridge.training.utils.checkpoint_utils bridge.training.utils.comet_utils bridge.training.utils.config_utils bridge.training.utils.flop_utils bridge.training.utils.log_utils bridge.training.utils.mlflow_utils bridge.training.utils.moe_token_drop bridge.training.utils.omegaconf_utils bridge.training.utils.packed_seq_utils bridge.training.utils.padding_utils bridge.training.utils.pg_utils bridge.training.utils.sig_utils bridge.training.utils.theoretical_memory_utils bridge.training.utils.train_utils bridge.training.utils.visual_inputs bridge.training.utils.wandb_utils bridge.training.vlm_step bridge.utils bridge.utils.activation_map bridge.utils.common_utils bridge.utils.decorators bridge.utils.fusions bridge.utils.import_utils bridge.utils.instantiate_utils bridge.utils.safe_pickle bridge.utils.slurm_utils bridge.utils.vocab_utils bridge.utils.yaml_utils module_filter() (in module bridge.training.utils.log_utils) module_name (bridge.training.checkpointing.CheckpointLoadContext attribute) (bridge.training.checkpointing.CheckpointSaveContext attribute) module_names (bridge.models.mimo.mimo_config.MimoParallelismConfig property) module_output_ndim (bridge.models.mimo.mimo_provider.MimoModelInfra attribute) (bridge.models.mimo.mimo_provider.MimoModelProvider attribute) module_parallelisms (bridge.models.mimo.mimo_config.MimoParallelismConfig attribute) module_to_grid_map (bridge.models.mimo.mimo_provider.MimoModelInfra attribute) module_to_grid_tuple (bridge.training.setup_mimo.MimoSetupOutput attribute) ModuleDict (class in bridge.peft.canonical_lora) ModuleFunc (in module bridge.peft.walk_utils) ModuleMatcher (class in bridge.peft.module_matcher) ModuleParallelismConfig (class in bridge.models.mimo.mimo_config) ModulePredicate (in module bridge.peft.walk_utils) moe_aux_loss_coeff (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) moe_ffn_hidden_size (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.Nemotron3NanoProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) moe_flex_dispatcher_backend (bridge.recipes.qwen_vl.qwen3_vl.Qwen3VLCommonKwargs attribute) (bridge.recipes.run_plugins.PerfEnvPlugin attribute) moe_grouped_gemm (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) moe_infer() (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2SparseMoeBlock method) moe_latent_size (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider attribute) moe_layer_freq (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) moe_permute_fusion (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) moe_router_bias_update_rate (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) moe_router_dtype (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) moe_router_enable_expert_bias (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) moe_router_group_topk (bridge.models.nemotronh.nemotron_h_provider.Nemotron3NanoProvider attribute) moe_router_load_balancing_type (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) moe_router_num_groups (bridge.models.nemotronh.nemotron_h_provider.Nemotron3NanoProvider attribute) moe_router_pre_softmax (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) moe_router_score_function (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) moe_router_topk (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.Nemotron3NanoProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) moe_router_topk_scaling_factor (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.Nemotron3NanoProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) moe_shared_expert_gate (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) moe_shared_expert_intermediate_size (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.Nemotron3NanoProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) moe_shared_expert_overlap (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) moe_token_dispatcher_type (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) MoEV2CausalLMOutputWithPast (class in bridge.models.bailing.modeling_bailing_moe_v2) MoeV2ModelOutputWithPast (class in bridge.models.bailing.modeling_bailing_moe_v2) monitor_process_interval (bridge.training.config.InProcessRestartConfig attribute) monitor_process_logdir (bridge.training.config.InProcessRestartConfig attribute) monitor_thread_interval (bridge.training.config.InProcessRestartConfig attribute) moonlight_16b_peft_config() (in module bridge.recipes.moonlight.moonlight_16b) moonlight_16b_pretrain_config() (in module bridge.recipes.moonlight.moonlight_16b) moonlight_16b_sft_config() (in module bridge.recipes.moonlight.moonlight_16b) mrope_section (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.qwen3_asr.modeling_qwen3_asr.transformer_config.Qwen3ASRTransformerConfig attribute) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_omni.modeling_qwen25_omni.transformer_config.Qwen25OmniTransformerConfig attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config.Qwen3VLTransformerConfig attribute) (bridge.models.qwen_vl.qwen25_vl_provider.Qwen25VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) mscale (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) mscale_all_dim (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) mtp_block_spec() (in module bridge.models.gpt.gpt_builder) (in module bridge.models.gpt_provider) mtp_enabled (bridge.models.gpt_provider.GPTModelProvider attribute) mtp_hybrid_override_pattern (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) mtp_logits (bridge.models.bailing.modeling_bailing_moe_v2.MoEV2CausalLMOutputWithPast attribute) mtp_loss (bridge.models.bailing.modeling_bailing_moe_v2.MoEV2CausalLMOutputWithPast attribute) mtp_loss_scaling_factor (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) mtp_num_layers (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) multi_latent_attention (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) multimodule_communicator (bridge.training.setup_mimo.MimoSetupOutput attribute) multimodule_no_sync() (in module bridge.training.mimo_parallel_utils) multimodule_pg_collection (bridge.training.setup_mimo.MimoSetupOutput attribute) N name (bridge.recipes.run_plugins.CometPlugin attribute) (bridge.recipes.run_plugins.CometPluginScriptArgs attribute) (bridge.recipes.run_plugins.WandbPlugin attribute) (bridge.recipes.run_plugins.WandbPluginScriptArgs attribute) NAME (bridge.utils.instantiate_utils._Keys attribute) nanov2_bf16_with_fp8_current_scaling_mixed() (in module bridge.training.mixed_precision) nccl_pp_comm_chunksize (bridge.recipes.run_plugins.PerfEnvPlugin attribute) needs_global_state_injection() (in module bridge.training.utils.train_utils) NEMO_CACHE_HOME (in module bridge.data.datasets.sft) NEMO_DATASETS_CACHE (in module bridge.data.datasets.sft) NEMO_MODELS_CACHE (in module bridge.data.datasets.sft) Nemotron3NanoProvider (class in bridge.models.nemotronh.nemotron_h_provider) nemotron_3_nano_peft_config() (in module bridge.recipes.nemotronh.nemotron_3_nano) nemotron_3_nano_pretrain_config() (in module bridge.recipes.nemotronh.nemotron_3_nano) nemotron_3_nano_sft_config() (in module bridge.recipes.nemotronh.nemotron_3_nano) nemotron_3_super_bf16_with_nvfp4_mixed() (in module bridge.training.mixed_precision) NEMOTRON_3_SUPER_HF_MODEL_ID (in module bridge.recipes.nemotronh.nemotron_3_super) nemotron_3_super_peft_config() (in module bridge.recipes.nemotronh.nemotron_3_super) nemotron_3_super_pretrain_config() (in module bridge.recipes.nemotronh.nemotron_3_super) nemotron_3_super_sft_config() (in module bridge.recipes.nemotronh.nemotron_3_super) nemotron_h_bf16_with_fp8_current_scaling_mixed() (in module bridge.training.mixed_precision) nemotron_nano_12b_v2_peft_config() (in module bridge.recipes.nemotronh.nemotron_nano_v2) nemotron_nano_12b_v2_pretrain_config() (in module bridge.recipes.nemotronh.nemotron_nano_v2) nemotron_nano_12b_v2_sft_config() (in module bridge.recipes.nemotronh.nemotron_nano_v2) nemotron_nano_9b_v2_peft_config() (in module bridge.recipes.nemotronh.nemotron_nano_v2) nemotron_nano_9b_v2_pretrain_config() (in module bridge.recipes.nemotronh.nemotron_nano_v2) nemotron_nano_9b_v2_sft_config() (in module bridge.recipes.nemotronh.nemotron_nano_v2) nemotron_nano_v2_vl_12b_peft_config() (in module bridge.recipes.nemotron_vl.nemotron_nano_v2_vl) nemotron_nano_v2_vl_12b_sft_config() (in module bridge.recipes.nemotron_vl.nemotron_nano_v2_vl) nemotron_nano_v2_vl_collate_fn() (in module bridge.data.vlm_datasets.collate) NemotronBridge (class in bridge.models.nemotron.nemotron_bridge) nemotronh_47b_peft_config() (in module bridge.recipes.nemotronh.nemotronh) nemotronh_47b_pretrain_config() (in module bridge.recipes.nemotronh.nemotronh) nemotronh_47b_sft_config() (in module bridge.recipes.nemotronh.nemotronh) nemotronh_4b_peft_config() (in module bridge.recipes.nemotronh.nemotronh) nemotronh_4b_pretrain_config() (in module bridge.recipes.nemotronh.nemotronh) nemotronh_4b_sft_config() (in module bridge.recipes.nemotronh.nemotronh) nemotronh_56b_peft_config() (in module bridge.recipes.nemotronh.nemotronh) nemotronh_56b_pretrain_config() (in module bridge.recipes.nemotronh.nemotronh) nemotronh_56b_sft_config() (in module bridge.recipes.nemotronh.nemotronh) nemotronh_8b_peft_config() (in module bridge.recipes.nemotronh.nemotronh) nemotronh_8b_pretrain_config() (in module bridge.recipes.nemotronh.nemotronh) nemotronh_8b_sft_config() (in module bridge.recipes.nemotronh.nemotronh) NemotronHBridge (class in bridge.models.nemotronh.nemotron_h_bridge) NemotronHModel47BProvider (class in bridge.models.nemotronh.nemotron_h_provider) NemotronHModel4BProvider (class in bridge.models.nemotronh.nemotron_h_provider) NemotronHModel56BProvider (class in bridge.models.nemotronh.nemotron_h_provider) NemotronHModel8BProvider (class in bridge.models.nemotronh.nemotron_h_provider) NemotronHModelProvider (class in bridge.models.nemotronh.nemotron_h_provider) NemotronHModelProvider47B (class in bridge.models.nemotronh.nemotron_h_provider) NemotronHModelProvider4B (class in bridge.models.nemotronh.nemotron_h_provider) NemotronHModelProvider56B (class in bridge.models.nemotronh.nemotron_h_provider) NemotronHModelProvider8B (class in bridge.models.nemotronh.nemotron_h_provider) NemotronNano12Bv2Provider (class in bridge.models.nemotronh.nemotron_h_provider) NemotronNano12Bv2VLModelProvider (class in bridge.models.nemotron_vl.nemotron_vl_provider) NemotronNano9Bv2Provider (class in bridge.models.nemotronh.nemotron_h_provider) NemotronNanoModelProvider12Bv2 (class in bridge.models.nemotronh.nemotron_h_provider) NemotronNanoModelProvider9Bv2 (class in bridge.models.nemotronh.nemotron_h_provider) NemotronVLBridge (class in bridge.models.nemotron_vl.nemotron_vl_bridge) NemotronVLModel (class in bridge.models.nemotron_vl.modeling_nemotron_vl) noisy_latents (bridge.diffusion.common.flow_matching.adapters.base.FlowMatchingContext attribute) non_persistent_ckpt (bridge.training.checkpointing.CheckpointSaveContext attribute) norm1 (bridge.diffusion.models.wan.wan_layer_spec.WanWithAdaLNSubmodules attribute) norm2 (bridge.diffusion.models.wan.wan_layer_spec.WanWithAdaLNSubmodules attribute) norm3 (bridge.diffusion.models.wan.wan_layer_spec.WanWithAdaLNSubmodules attribute) normalization (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider attribute) (bridge.models.gemma_vl.modeling_gemma3_vl.Gemma3VLMultimodalProjectorConfig attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.llama_nemotron.llama_nemotron_provider.LlamaNemotronHeterogeneousProvider attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) (bridge.models.mistral.mistral_provider.MistralModelProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) normalize_callbacks() (in module bridge.training.callbacks) normalize_modulate() (bridge.diffusion.models.wan.wan_layer_spec.WanAdaLN method) normalize_moe_lora (bridge.peft.canonical_lora.CanonicalLoRA attribute) (bridge.peft.lora.LoRA attribute) normalized_for_model() (bridge.training.utils.visual_inputs.GenericVisualInputs method) (bridge.training.utils.visual_inputs.Qwen2_5_VLVisualInputs method) (bridge.training.utils.visual_inputs.Qwen2AudioInputs method) nsys_gpu_metrics (bridge.recipes.run_plugins.NsysPlugin attribute) nsys_trace (bridge.recipes.run_plugins.NsysPlugin attribute) NsysPlugin (class in bridge.recipes.run_plugins) NsysPluginScriptArgs (class in bridge.recipes.run_plugins) null_decorator() (in module bridge.utils.import_utils) num_attention_heads (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider14B attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider1_3B attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider27B attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider2B attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider9B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider12B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider1B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider27B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider4B attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider2B attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider7B attribute) (bridge.models.gemma_vl.modeling_gemma3_vl.Gemma3VLMultimodalProjectorConfig attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.gpt_provider.GPTProvider175B attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) (bridge.models.mistral.mistral_provider.MistralModelProvider attribute) (bridge.models.mistral.mistral_provider.MistralSmall3ModelProvider24B attribute) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.Nemotron3NanoProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider47B attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider4B attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider56B attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider8B attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronNanoModelProvider12Bv2 attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronNanoModelProvider9Bv2 attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) num_beams (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) NUM_BYTES_IN_MEGABYTE (in module bridge.training.utils.theoretical_memory_utils) num_floating_point_operations() (in module bridge.training.utils.flop_utils) num_floating_point_operations_so_far (bridge.training.checkpointing.CheckpointSaveContext attribute) num_frames (bridge.diffusion.data.common.diffusion_sample.DiffusionSample attribute) num_gpu_perf_scores_to_print (bridge.training.config.NVRxStragglerDetectionConfig attribute) num_gpus (bridge.recipes.run_plugins.PerfEnvPlugin attribute) num_images (bridge.data.vlm_datasets.mock_provider.MockVLMConversationProvider attribute) (bridge.recipes.qwen_vl.qwen25_vl_dataset.MockQwen25VLDatasetProvider attribute) num_in_job_restarts (bridge.recipes.run_plugins.FaultTolerancePlugin attribute) num_job_retries_on_failure (bridge.recipes.run_plugins.FaultTolerancePlugin attribute) num_joint_layers (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) num_layers (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider14B attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider1_3B attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider27B attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider2B attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider9B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider12B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider1B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider27B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider4B attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider2B attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider7B attribute) (bridge.models.gemma_vl.modeling_gemma3_vl.Gemma3VLMultimodalProjectorConfig attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.gpt_provider.GPTProvider175B attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider14B attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider3B attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider8B attribute) (bridge.models.mistral.mistral_provider.MistralModelProvider attribute) (bridge.models.mistral.mistral_provider.MistralSmall3ModelProvider24B attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) num_layers_at_end_in_bf16 (bridge.training.mixed_precision.MixedPrecisionConfig attribute) num_layers_at_start_in_bf16 (bridge.training.mixed_precision.MixedPrecisionConfig attribute) num_layers_in_first_pipeline_stage (bridge.models.model_provider.ModelParallelKwargs attribute) num_layers_in_last_pipeline_stage (bridge.models.model_provider.ModelParallelKwargs attribute) num_moe_experts (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.Nemotron3NanoProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) num_parameters (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM property) num_parameters() (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) num_position_embeddings (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config.Qwen3VLTransformerConfig attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) num_query_groups (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider27B attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider2B attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider9B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider12B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider1B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider27B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider4B attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider2B attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider7B attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.llama_nemotron.llama_nemotron_provider.LlamaNemotronHeterogeneousProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) (bridge.models.mistral.mistral_provider.MistralModelProvider attribute) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.Nemotron3NanoProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) num_return_sequences (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) num_single_layers (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) num_sm (bridge.training.comm_overlap.BulkOverlapCfg attribute) (bridge.training.comm_overlap.PipelineOverlapCfg attribute) (bridge.training.comm_overlap.RingExchangeOverlapCfg attribute) num_splits (bridge.training.comm_overlap.PipelineOverlapCfg attribute) num_tokenizer_workers (bridge.data.datasets.packed_sequence.PackedSequenceSpecs attribute) num_train_samples (bridge.diffusion.data.flux.flux_mock_datamodule.FluxMockDataModuleConfig attribute) num_workers (bridge.data.energon.energon_provider.EnergonProvider attribute) (bridge.diffusion.data.common.diffusion_energon_datamodule.DiffusionDataModuleConfig attribute) (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDataModuleConfig attribute) (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDatasetConfig attribute) (bridge.diffusion.data.flux.flux_mock_datamodule.FluxMockDataModuleConfig attribute) (bridge.diffusion.data.wan.wan_energon_datamodule.WanDataModuleConfig attribute) (bridge.diffusion.data.wan.wan_energon_datamodule.WanDatasetConfig attribute) (bridge.diffusion.data.wan.wan_mock_datamodule.WanMockDataModuleConfig attribute) (bridge.training.config.DataloaderConfig attribute) number_packed_samples (bridge.diffusion.data.wan.wan_energon_datamodule.WanDatasetConfig attribute) (bridge.diffusion.data.wan.wan_mock_datamodule.WanMockDataModuleConfig attribute) numpy_to_pil() (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.FluxInferencePipeline static method) nvrx_straggler (bridge.training.config.ConfigContainer attribute) nvrx_straggler_manager (bridge.training.state.GlobalState property) NVRxStragglerDetectionConfig (class in bridge.training.config) NVRxStragglerDetectionManager (class in bridge.training.nvrx_straggler) O olmoe_7b_peft_config() (in module bridge.recipes.olmoe.olmoe_7b) olmoe_7b_pretrain_config() (in module bridge.recipes.olmoe.olmoe_7b) olmoe_7b_sft_config() (in module bridge.recipes.olmoe.olmoe_7b) olmoe_layer_spec() (in module bridge.models.olmoe.olmoe_provider) OlMoEBridge (class in bridge.models.olmoe.olmoe_bridge) OlMoEModelProvider (class in bridge.models.olmoe.olmoe_provider) OLMoESelfAttention (class in bridge.models.olmoe.olmoe_provider) on_checkpoint_loaded() (in module bridge.training.fault_tolerance) on_checkpoint_save() (bridge.training.callbacks.Callback method) on_checkpointing_end() (in module bridge.training.fault_tolerance) on_checkpointing_start() (in module bridge.training.fault_tolerance) on_data_init_start() (bridge.training.callbacks.Callback method) on_eval_end() (bridge.training.callbacks.Callback method) on_eval_start() (bridge.training.callbacks.Callback method) on_eval_step_end() (bridge.training.callbacks.Callback method) (in module bridge.training.fault_tolerance) on_eval_step_start() (bridge.training.callbacks.Callback method) (in module bridge.training.fault_tolerance) on_load_checkpoint_success() (in module bridge.training.utils.comet_utils) (in module bridge.training.utils.mlflow_utils) (in module bridge.training.utils.wandb_utils) on_save_checkpoint_success() (in module bridge.training.utils.comet_utils) (in module bridge.training.utils.mlflow_utils) (in module bridge.training.utils.wandb_utils) on_test_end() (bridge.training.callbacks.Callback method) on_test_start() (bridge.training.callbacks.Callback method) on_test_step_end() (bridge.training.callbacks.Callback method) on_test_step_start() (bridge.training.callbacks.Callback method) on_train_end() (bridge.training.callbacks.Callback method) on_train_start() (bridge.training.callbacks.Callback method) on_train_step_end() (bridge.training.callbacks.Callback method) on_train_step_start() (bridge.training.callbacks.Callback method) on_training_step_end() (in module bridge.training.fault_tolerance) on_training_step_start() (in module bridge.training.fault_tolerance) opt_param_scheduler (bridge.training.checkpointing.CheckpointLoadContext attribute) (bridge.training.checkpointing.CheckpointSaveContext attribute) optimizer (bridge.training.callbacks.CallbackContext attribute) (bridge.training.checkpointing.CheckpointLoadContext attribute) (bridge.training.checkpointing.CheckpointSaveContext attribute) (bridge.training.config.ConfigContainer attribute) (bridge.training.setup.SetupOutput attribute) (bridge.training.setup_mimo.MimoSetupOutput attribute) optimizer_config (bridge.training.config.OptimizerConfigOverrideProviderContext attribute) optimizer_config_override_provider (bridge.training.config.ConfigContainer attribute) OptimizerConfig (class in bridge.training.config) OptimizerConfigOverrideProvider (class in bridge.training.config) OptimizerConfigOverrideProviderContext (class in bridge.training.config) OPTIONAL_ARTIFACTS (bridge.models.hf_pretrained.base.PreTrainedBase attribute) (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM attribute) (bridge.models.hf_pretrained.vlm.PreTrainedVLM attribute) order (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.FlowMatchEulerDiscreteScheduler attribute) original_answers (bridge.data.builders.hf_dataset.ProcessExampleOutput attribute) original_max_position_embeddings (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) out_channels (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) out_features (bridge.peft.utils.AdapterAttributes attribute) out_hidden_size (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config.Qwen3VLTransformerConfig attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) output (bridge.data.builders.hf_dataset.ProcessExampleOutput attribute) output_attentions (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) output_scores (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) overlap_grad_reduce (bridge.training.comm_overlap._CommOverlapConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) overlap_moe_expert_parallel_comm (bridge.training.comm_overlap._CommOverlapConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) overlap_p2p_comm (bridge.training.comm_overlap._CommOverlapConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) overlap_param_gather (bridge.training.comm_overlap._CommOverlapConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) overlap_param_gather_with_optimizer_step (bridge.models.model_provider.GetModelKwargs attribute) (bridge.training.comm_overlap._CommOverlapConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) OverridesError P pack_batch_sequences() (in module bridge.training.vlm_step) pack_dist_train_vision_module_output() (in module bridge.models.qwen_vl.modelling_qwen3_vl.utils) pack_metadata (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder property) pack_or_pad_batch_sequences() (in module bridge.models.qwen_vl.qwen3_vl_step) pack_selected_samples() (bridge.diffusion.data.common.diffusion_task_encoder_with_sp.DiffusionTaskEncoderWithSequencePacking method) pack_sequences_in_batch (bridge.data.energon.energon_provider.EnergonProvider attribute) (bridge.data.vlm_datasets.hf_provider.HFDatasetConversationProvider attribute) (bridge.data.vlm_datasets.mock_provider.MockVLMConversationProvider attribute) (bridge.data.vlm_datasets.preloaded_provider.PreloadedVLMConversationProvider attribute) packed_metadata_path (bridge.data.datasets.packed_sequence.PackedSequenceSpecs attribute) packed_sequence_size (bridge.data.datasets.packed_sequence.PackedSequenceSpecs attribute) packed_sequence_specs (bridge.training.config.FinetuningDatasetConfig attribute) packed_train_data_path (bridge.data.datasets.packed_sequence.PackedSequenceSpecs attribute) packed_val_data_path (bridge.data.datasets.packed_sequence.PackedSequenceSpecs attribute) PackedSequenceSpecs (class in bridge.data.datasets.packed_sequence) PACKING_ALGOS (in module bridge.data.datasets.packing_utils) packing_buffer_size (bridge.diffusion.data.common.diffusion_energon_datamodule.DiffusionDataModuleConfig attribute) (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDataModuleConfig attribute) (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDatasetConfig attribute) (bridge.diffusion.data.flux.flux_mock_datamodule.FluxMockDataModuleConfig attribute) (bridge.diffusion.data.wan.wan_energon_datamodule.WanDataModuleConfig attribute) (bridge.diffusion.data.wan.wan_energon_datamodule.WanDatasetConfig attribute) (bridge.diffusion.data.wan.wan_mock_datamodule.WanMockDataModuleConfig attribute) pad_cu_seqlens (bridge.data.datasets.packed_sequence.PackedSequenceSpecs attribute) pad_or_truncate_2d_to_len() (in module bridge.training.utils.padding_utils) pad_or_truncate_attn_to_len() (in module bridge.training.utils.padding_utils) pad_or_truncate_pos_to_len() (in module bridge.training.utils.padding_utils) pad_seq_to_mult (bridge.data.datasets.packed_sequence.PackedSequenceSpecs attribute) pad_seq_to_mult() (in module bridge.peft.utils) pad_to_max_length (bridge.data.vlm_datasets.mock_provider.MockVLMConversationProvider attribute) (bridge.recipes.qwen_vl.qwen25_vl_dataset.MockQwen25VLDatasetProvider attribute) pad_token_id (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) (bridge.models.kimi_vl.kimi_k25_vl_provider.KimiK25VLModelProvider attribute) (bridge.models.qwen_audio.qwen2_audio_provider.Qwen2AudioModelProvider attribute) PAD_TOKENS (in module bridge.data.vlm_datasets.token_utils) padded_and_mask_function() (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRAudioEncoder method) padding (bridge.models.hf_pretrained.causal_lm.EncodeKwargs attribute) padding_mask (bridge.diffusion.data.common.diffusion_sample.DiffusionSample attribute) parallel_output (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) (bridge.models.gpt.gpt_builder.GPTModelConfig attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.mamba.mamba_builder.MambaModelConfig attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) ParallelLinearAdapter (class in bridge.peft.utils) ParallelLinearDoRAAdapter (class in bridge.peft.dora_layers) ParallelTimestepEmbedding (class in bridge.diffusion.models.common.dit_embeddings) param_is_not_shared() (in module bridge.training.utils.train_utils) param_name (bridge.models.conversion.model_bridge.HFWeightTuple attribute) (bridge.models.conversion.model_bridge.MegatronWeightTuple attribute) (bridge.models.conversion.model_bridge.WeightConversionTask attribute) param_name_patterns_for_fp32_local_accumulation (bridge.training.config.DistributedDataParallelConfig attribute) param_weight (bridge.models.conversion.model_bridge.WeightConversionTask attribute) params_dtype (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.llama_nemotron.llama_nemotron_provider.LlamaNemotronHeterogeneousProvider attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) (bridge.models.mistral.mistral_provider.MistralModelProvider attribute) (bridge.models.mistral.mistral_provider.MistralSmall3ModelProvider24B attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.training.mixed_precision.MixedPrecisionConfig attribute) params_to_save (bridge.peft.base.PEFT attribute) parse_hydra_overrides() (in module bridge.training.utils.omegaconf_utils) PARTIAL (bridge.utils.instantiate_utils._Keys attribute) participating_modules (bridge.models.mimo.mimo_provider.MimoModelInfra attribute) past_key_values (bridge.models.bailing.modeling_bailing_moe_v2.MoEV2CausalLMOutputWithPast attribute) PATCH (in module bridge.package_info) patch_dim (bridge.models.gemma_vl.modeling_gemma3_vl.Gemma3VLMultimodalProjectorConfig attribute) patch_linear_module() (in module bridge.peft.lora_layers) patch_norm (bridge.models.qwen_vl.modelling_qwen3_vl.utils.PatchMergerSubmodules attribute) patch_size (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) (bridge.models.qwen_omni.modeling_qwen25_omni.transformer_config.Qwen25OmniTransformerConfig attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config.Qwen3VLTransformerConfig attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) patch_spatial (bridge.diffusion.data.wan.wan_energon_datamodule.WanDatasetConfig attribute) (bridge.diffusion.data.wan.wan_mock_datamodule.WanMockDataModuleConfig attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) patch_temporal (bridge.diffusion.data.wan.wan_energon_datamodule.WanDatasetConfig attribute) (bridge.diffusion.data.wan.wan_mock_datamodule.WanMockDataModuleConfig attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) patchify() (in module bridge.diffusion.models.wan.utils) PatchMergerSubmodules (class in bridge.models.qwen_vl.modelling_qwen3_vl.utils) path (bridge.data.energon.energon_provider.EnergonProvider attribute) (bridge.diffusion.data.common.diffusion_energon_datamodule.DiffusionDataModuleConfig attribute) (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDataModuleConfig attribute) (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDatasetConfig attribute) (bridge.diffusion.data.flux.flux_mock_datamodule.FluxMockDataModuleConfig attribute) (bridge.diffusion.data.wan.wan_energon_datamodule.WanDataModuleConfig attribute) (bridge.diffusion.data.wan.wan_energon_datamodule.WanDatasetConfig attribute) (bridge.diffusion.data.wan.wan_mock_datamodule.WanMockDataModuleConfig attribute) (bridge.models.hf_pretrained.state.SafeTensorsStateSource property) peft (bridge.training.config.ConfigContainer attribute) PEFT (class in bridge.peft.base) PEFT_RECOMPUTE_PATCHED (in module bridge.peft.recompute) PerfEnvPlugin (class in bridge.recipes.run_plugins) PerfEnvPluginScriptArgs (class in bridge.recipes.run_plugins) persist_layer_norm (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.llama_nemotron.llama_nemotron_provider.LlamaNemotronHeterogeneousProvider attribute) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) persistent_buffers() (in module bridge.models.conversion.utils) persistent_workers (bridge.training.config.DataloaderConfig attribute) pg_collection (bridge.training.checkpointing.CheckpointLoadContext attribute) (bridge.training.checkpointing.CheckpointSaveContext attribute) (bridge.training.config.DatasetBuildContext attribute) (bridge.training.setup.SetupOutput attribute) pg_collections (bridge.models.mimo.mimo_provider.MimoModelInfra attribute) phi4_mm_collate_fn() (in module bridge.data.vlm_datasets.collate) pil_image_from_base64() (in module bridge.models.nemotron_vl.nemotron_vl_utils) pin_memory (bridge.training.config.DataloaderConfig attribute) pipeline_dtype (bridge.models.model_provider.ModelParallelKwargs attribute) (bridge.training.mixed_precision.MixedPrecisionConfig attribute) pipeline_model_parallel_size (bridge.models.mimo.mimo_config.ModuleParallelismConfig attribute) (bridge.models.model_provider.ModelParallelKwargs attribute) (bridge.recipes.qwen_vl.qwen3_vl.Qwen3VLCommonKwargs attribute) PipelineOverlapCfg (class in bridge.training.comm_overlap) pixel_values (bridge.training.utils.visual_inputs.GenericVisualInputs attribute) (bridge.training.utils.visual_inputs.Qwen2_5_VLVisualInputs attribute) pixel_values_videos (bridge.training.utils.visual_inputs.GenericVisualInputs attribute) pooled_prompt_dim (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDatasetConfig attribute) (bridge.diffusion.data.flux.flux_mock_datamodule.FluxMockDataModuleConfig attribute) populate_embedding_and_position_groups() (in module bridge.models.mimo.mimo_builder) pos_ids (bridge.diffusion.data.common.diffusion_sample.DiffusionSample attribute) position_embedding_type (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.gpt.gpt_builder.GPTModelConfig attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.llama_nemotron.llama_nemotron_provider.LlamaNemotronHeterogeneousProvider attribute) (bridge.models.mamba.mamba_builder.MambaModelConfig attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) (bridge.models.mistral.mistral_provider.MistralModelProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_audio.qwen2_audio_provider.Qwen2AudioModelProvider attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.qwen25_vl_provider.Qwen25VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) position_id_per_seconds (bridge.models.qwen_omni.modeling_qwen25_omni.transformer_config.Qwen25OmniTransformerConfig attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) position_ids (bridge.data.energon.hf_encoder_task_encoder.HFEncoderTaskBatch attribute) post_process_prediction() (bridge.diffusion.common.flow_matching.adapters.base.ModelAdapter method) post_wrap_hook (bridge.models.model_provider.GetModelKwargs attribute) (bridge.models.model_provider.ModelProviderMixin property) post_wrap_hooks (bridge.models.common.base.ModelConfig attribute) pp_rank (bridge.models.conversion.model_bridge.WeightConversionTask attribute) (bridge.models.conversion.param_mapping.MegatronParamMapping property) pp_size (bridge.models.conversion.param_mapping.MegatronParamMapping property) (bridge.recipes.run_plugins.PerfEnvPlugin attribute) PRE_RELEASE (in module bridge.package_info) pre_wrap_hook (bridge.models.model_provider.GetModelKwargs attribute) (bridge.models.model_provider.ModelProviderMixin property) pre_wrap_hooks (bridge.models.common.base.ModelConfig attribute) precision_config (bridge.recipes.qwen_vl.qwen3_vl.Qwen3VLCommonKwargs attribute) preempt_time (bridge.recipes.run_plugins.PreemptionPlugin attribute) PreemptionPlugin (class in bridge.recipes.run_plugins) PreemptionPluginScriptArgs (class in bridge.recipes.run_plugins) PREFIX_STR (in module bridge.data.datasets.sft) (in module bridge.data.datasets.utils) PreloadedVLMConversationProvider (class in bridge.data.vlm_datasets.preloaded_provider) prepare_data() (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder method) (bridge.data.builders.hf_dataset.HFDatasetBuilder method) prepare_finetuning_batch() (in module bridge.data.finetuning) prepare_forward_step_func() (in module bridge.training.utils.train_utils) prepare_inputs() (bridge.diffusion.common.flow_matching.adapters.base.ModelAdapter method) (bridge.diffusion.common.flow_matching.adapters.simple.SimpleAdapter method) (bridge.diffusion.models.flux.flow_matching.flux_adapter.MegatronFluxAdapter method) (bridge.diffusion.models.wan.flow_matching.flow_matching_pipeline_wan.WanAdapter method) prepare_inputs_for_generation() (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerForConditionalGeneration method) prepare_latents() (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.FluxInferencePipeline method) prepare_packed_data() (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder method) prepare_packed_sequence_data() (in module bridge.data.datasets.packed_sequence) preprocess_and_split_data() (in module bridge.data.builders.hf_dataset) preprocess_fn (bridge.data.mimo.hf_provider.HFMimoDatasetProvider attribute) preprocess_fsdp_dtensor_state_dict() (in module bridge.training.checkpointing) preprocess_packed_seqs() (in module bridge.models.qwen_vl.modelling_qwen3_vl.utils) pretrain() (in module bridge.training.pretrain) pretrain_mimo() (in module bridge.training.pretrain_mimo) pretrain_train_valid_test_datasets_provider() (in module bridge.data.utils) pretrained_checkpoint (bridge.training.config.CheckpointConfig attribute) pretrained_model_name (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) PreTrainedBase (class in bridge.models.hf_pretrained.base) PreTrainedCausalLM (class in bridge.models.hf_pretrained.causal_lm) PreTrainedFlux (class in bridge.diffusion.conversion.flux.flux_hf_pretrained) PreTrainedVLM (class in bridge.models.hf_pretrained.vlm) PreTrainedWAN (class in bridge.diffusion.conversion.wan.wan_hf_pretrained) print_rank_0() (in module bridge.utils.common_utils) print_rank_last() (in module bridge.utils.common_utils) print_yaml() (bridge.training.utils.config_utils._ConfigContainerBase method) process_config_with_overrides() (in module bridge.training.utils.omegaconf_utils) process_example_fn (bridge.data.builders.hf_dataset.HFDatasetConfig attribute) process_gsm8k_example() (in module bridge.data.hf_processors.gsm8k) process_images_and_text() (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) process_openmathinstruct2_example() (in module bridge.data.hf_processors.openmathinstruct2) process_squad_example() (in module bridge.data.hf_processors.squad) process_text_batch() (in module bridge.data.vlm_datasets.token_utils) ProcessExampleFn (class in bridge.data.builders.hf_dataset) ProcessExampleOutput (class in bridge.data.builders.hf_dataset) processor (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM property) (bridge.models.hf_pretrained.vlm.PreTrainedVLM property) processor_paths (bridge.data.mimo.hf_provider.HFMimoDatasetProvider attribute) (bridge.data.mimo.mock_provider.MockMimoProvider attribute) profile_ranks (bridge.recipes.run_plugins.NsysPlugin attribute) (bridge.recipes.run_plugins.NsysPluginScriptArgs attribute) (bridge.recipes.run_plugins.PyTorchProfilerPlugin attribute) (bridge.recipes.run_plugins.PyTorchProfilerPluginScriptArgs attribute) profile_step_end (bridge.recipes.run_plugins.NsysPlugin attribute) (bridge.recipes.run_plugins.NsysPluginScriptArgs attribute) (bridge.recipes.run_plugins.PyTorchProfilerPlugin attribute) (bridge.recipes.run_plugins.PyTorchProfilerPluginScriptArgs attribute) profile_step_start (bridge.recipes.run_plugins.NsysPlugin attribute) (bridge.recipes.run_plugins.NsysPluginScriptArgs attribute) (bridge.recipes.run_plugins.PyTorchProfilerPlugin attribute) (bridge.recipes.run_plugins.PyTorchProfilerPluginScriptArgs attribute) profiling (bridge.training.config.ConfigContainer attribute) profiling_interval (bridge.training.config.NVRxStragglerDetectionConfig attribute) ProfilingConfig (class in bridge.training.config) progress_watchdog_interval (bridge.training.config.InProcessRestartConfig attribute) proj_dgrad (bridge.training.comm_overlap.TransformerLayerTPOverlapCfg attribute) proj_fprop (bridge.training.comm_overlap.TransformerLayerTPOverlapCfg attribute) project (bridge.recipes.run_plugins.CometPlugin attribute) (bridge.recipes.run_plugins.CometPluginScriptArgs attribute) (bridge.recipes.run_plugins.WandbPlugin attribute) (bridge.recipes.run_plugins.WandbPluginScriptArgs attribute) prompt (bridge.data.vlm_datasets.mock_provider.MockVLMConversationProvider attribute) (bridge.recipes.qwen_vl.qwen25_vl_dataset.MockQwen25VLDatasetProvider attribute) prompt_seq_len (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDatasetConfig attribute) (bridge.diffusion.data.flux.flux_mock_datamodule.FluxMockDataModuleConfig attribute) provide() (bridge.diffusion.models.flux.flux_provider.FluxProvider method) (bridge.diffusion.models.wan.wan_provider.WanModelProvider method) (bridge.models.distillation_provider.DistillationProvider method) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider method) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider method) (bridge.models.gemma.gemma_provider.GemmaModelProvider method) (bridge.models.gemma_vl.gemma3_vl_provider.Gemma3VLModelProvider method) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider method) (bridge.models.gpt_provider.GPTModelProvider method) (bridge.models.kimi_vl.kimi_k25_vl_provider.KimiK25VLModelProvider method) (bridge.models.mamba.mamba_provider.MambaModelProvider method) (bridge.models.mimo.mimo_provider.MimoModelProvider method) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider method) (bridge.models.model_provider.ModelProviderMixin method) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider method) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider method) (bridge.models.qwen_audio.qwen2_audio_provider.Qwen2AudioModelProvider method) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider method) (bridge.models.qwen_vl.qwen25_vl_provider.Qwen25VLModelProvider method) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider method) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider method) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider method) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider method) (bridge.models.t5_provider.T5ModelProvider method) provide_distributed_model() (bridge.models.mimo.mimo_provider.MimoModelProvider method) (bridge.models.model_provider.ModelProviderMixin method) provide_language_model() (bridge.models.gemma_vl.gemma3_vl_provider.Gemma3VLModelProvider method) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider method) (bridge.models.kimi_vl.kimi_k25_vl_provider.KimiK25VLModelProvider method) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider method) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider method) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider method) (bridge.models.qwen_audio.qwen2_audio_provider.Qwen2AudioModelProvider method) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider method) (bridge.models.qwen_vl.qwen25_vl_provider.Qwen25VLModelProvider method) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider method) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider method) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider method) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider method) provider_bridge() (bridge.diffusion.conversion.flux.flux_bridge.FluxBridge method) (bridge.diffusion.conversion.wan.wan_bridge.WanBridge method) (bridge.models.bailing.bailing_moe2_bridge.BailingMoeV2Bridge method) (bridge.models.conversion.model_bridge.MegatronModelBridge method) (bridge.models.deepseek.deepseek_v2_bridge.DeepSeekV2Bridge method) (bridge.models.deepseek.deepseek_v3_bridge.DeepSeekV3Bridge method) (bridge.models.gemma.gemma2_bridge.Gemma2Bridge method) (bridge.models.gemma.gemma3_bridge.Gemma3ModelBridge method) (bridge.models.gemma.gemma_bridge.GemmaBridge method) (bridge.models.gemma_vl.gemma3_vl_bridge.Gemma3VLBridge method) (bridge.models.glm.glm45_bridge.GLM45Bridge method) (bridge.models.glm_vl.glm_45v_bridge.GLM45VBridge method) (bridge.models.gpt_oss.gpt_oss_bridge.GPTOSSBridge method) (bridge.models.kimi.kimi_bridge.KimiK2Bridge method) (bridge.models.kimi_vl.kimi_k25_vl_bridge.KimiK25VLBridge method) (bridge.models.llama.llama_bridge.LlamaBridge method) (bridge.models.llama_nemotron.llama_nemotron_bridge.LlamaNemotronBridge method) (bridge.models.mimo.mimo_bridge.MimoBridge method) (bridge.models.minimax_m2.minimax_m2_bridge.MiniMaxM2Bridge method) (bridge.models.ministral3.ministral3_bridge.Ministral3Bridge method) (bridge.models.mistral.mistral_bridge.MistralBridge method) (bridge.models.nemotron.nemotron_bridge.NemotronBridge method) (bridge.models.nemotron_vl.nemotron_vl_bridge.NemotronVLBridge method) (bridge.models.nemotronh.nemotron_h_bridge.NemotronHBridge method) (bridge.models.olmoe.olmoe_bridge.OlMoEBridge method) (bridge.models.qwen.qwen2_bridge.Qwen2Bridge method) (bridge.models.qwen.qwen3_bridge.Qwen3Bridge method) (bridge.models.qwen.qwen3_moe_bridge.Qwen3MoEBridge method) (bridge.models.qwen.qwen3_next_bridge.Qwen3NextBridge method) (bridge.models.qwen3_asr.qwen3_asr_bridge.Qwen3ASRBridge method) (bridge.models.qwen_audio.qwen2_audio_bridge.Qwen2AudioBridge method) (bridge.models.qwen_omni.qwen25_omni_bridge.Qwen25OmniBridge method) (bridge.models.qwen_vl.qwen25_vl_bridge.Qwen25VLBridge method) (bridge.models.qwen_vl.qwen35_vl_bridge.Qwen35VLBridge method) (bridge.models.qwen_vl.qwen35_vl_bridge.Qwen35VLMoEBridge method) (bridge.models.qwen_vl.qwen3_vl_bridge.Qwen3VLBridge method) (bridge.models.qwen_vl.qwen3_vl_bridge.Qwen3VLMoEBridge method) (bridge.models.sarvam.sarvam_mla_bridge.SarvamMLABridge method) (bridge.models.sarvam.sarvam_moe_bridge.SarvamMoEBridge method) PROVIDER_CLASS (bridge.models.conversion.model_bridge.MegatronModelBridge attribute) push_to_hub() (bridge.models.conversion.auto_bridge.AutoBridge method) PyTorchProfilerPlugin (class in bridge.recipes.run_plugins) PyTorchProfilerPluginScriptArgs (class in bridge.recipes.run_plugins) Q q_layernorm (bridge.diffusion.models.common.dit_attention.DiTCrossAttentionSubmodules attribute) (bridge.diffusion.models.flux.flux_attention.JointSelfAttentionSubmodules attribute) q_lora_rank (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) qk_head_dim (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) qk_layernorm (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen3_asr.modeling_qwen3_asr.transformer_config.Qwen3ASRTransformerConfig attribute) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_omni.modeling_qwen25_omni.transformer_config.Qwen25OmniTransformerConfig attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) qk_pos_emb_head_dim (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) qkv_dgrad (bridge.training.comm_overlap.TransformerLayerTPOverlapCfg attribute) qkv_format (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) qkv_fprop (bridge.training.comm_overlap.TransformerLayerTPOverlapCfg attribute) qkv_wgrad (bridge.training.comm_overlap.TransformerLayerTPOverlapCfg attribute) QKVMapping (class in bridge.models.conversion.param_mapping) quantize_to_int4() (in module bridge.models.kimi_vl.utils) query_first_fit() (bridge.data.datasets.packing_utils._SegmentTree method) query_pre_attn_scalar (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider27B attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider2B attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider9B attribute) qwen25_14b_peft_config() (in module bridge.recipes.qwen.qwen2) qwen25_14b_pretrain_config() (in module bridge.recipes.qwen.qwen2) qwen25_14b_sft_config() (in module bridge.recipes.qwen.qwen2) qwen25_1p5b_peft_config() (in module bridge.recipes.qwen.qwen2) qwen25_1p5b_pretrain_config() (in module bridge.recipes.qwen.qwen2) qwen25_1p5b_sft_config() (in module bridge.recipes.qwen.qwen2) qwen25_32b_peft_config() (in module bridge.recipes.qwen.qwen2) qwen25_32b_pretrain_config() (in module bridge.recipes.qwen.qwen2) qwen25_32b_sft_config() (in module bridge.recipes.qwen.qwen2) qwen25_500m_peft_config() (in module bridge.recipes.qwen.qwen2) qwen25_500m_pretrain_config() (in module bridge.recipes.qwen.qwen2) qwen25_500m_sft_config() (in module bridge.recipes.qwen.qwen2) qwen25_72b_peft_config() (in module bridge.recipes.qwen.qwen2) qwen25_72b_pretrain_config() (in module bridge.recipes.qwen.qwen2) qwen25_72b_sft_config() (in module bridge.recipes.qwen.qwen2) qwen25_7b_peft_config() (in module bridge.recipes.qwen.qwen2) qwen25_7b_pretrain_config() (in module bridge.recipes.qwen.qwen2) qwen25_7b_sft_config() (in module bridge.recipes.qwen.qwen2) qwen25_vl_32b_peft_config() (in module bridge.recipes.qwen_vl.qwen25_vl) qwen25_vl_32b_sft_config() (in module bridge.recipes.qwen_vl.qwen25_vl) qwen25_vl_3b_peft_config() (in module bridge.recipes.qwen_vl.qwen25_vl) qwen25_vl_3b_sft_config() (in module bridge.recipes.qwen_vl.qwen25_vl) qwen25_vl_72b_peft_config() (in module bridge.recipes.qwen_vl.qwen25_vl) qwen25_vl_72b_sft_config() (in module bridge.recipes.qwen_vl.qwen25_vl) qwen25_vl_7b_peft_config() (in module bridge.recipes.qwen_vl.qwen25_vl) qwen25_vl_7b_sft_config() (in module bridge.recipes.qwen_vl.qwen25_vl) Qwen25OmniBridge (class in bridge.models.qwen_omni.qwen25_omni_bridge) Qwen25OmniModel (class in bridge.models.qwen_omni.modeling_qwen25_omni.model) Qwen25OmniModelProvider (class in bridge.models.qwen_omni.qwen25_omni_provider) Qwen25OmniThinkerModel (class in bridge.models.qwen_omni.modeling_qwen25_omni.thinker_model) Qwen25OmniTransformerConfig (class in bridge.models.qwen_omni.modeling_qwen25_omni.transformer_config) Qwen25VLBridge (class in bridge.models.qwen_vl.qwen25_vl_bridge) Qwen25VLModel (class in bridge.models.qwen_vl.modeling_qwen25_vl) Qwen25VLModelProvider (class in bridge.models.qwen_vl.qwen25_vl_provider) qwen2_1p5b_peft_config() (in module bridge.recipes.qwen.qwen2) qwen2_1p5b_pretrain_config() (in module bridge.recipes.qwen.qwen2) qwen2_1p5b_sft_config() (in module bridge.recipes.qwen.qwen2) qwen2_500m_peft_config() (in module bridge.recipes.qwen.qwen2) qwen2_500m_pretrain_config() (in module bridge.recipes.qwen.qwen2) qwen2_500m_sft_config() (in module bridge.recipes.qwen.qwen2) qwen2_5_collate_fn() (in module bridge.data.vlm_datasets.collate) Qwen2_5_VLVisualInputs (class in bridge.training.utils.visual_inputs) qwen2_72b_peft_config() (in module bridge.recipes.qwen.qwen2) qwen2_72b_pretrain_config() (in module bridge.recipes.qwen.qwen2) qwen2_72b_sft_config() (in module bridge.recipes.qwen.qwen2) qwen2_7b_peft_config() (in module bridge.recipes.qwen.qwen2) qwen2_7b_pretrain_config() (in module bridge.recipes.qwen.qwen2) qwen2_7b_sft_config() (in module bridge.recipes.qwen.qwen2) qwen2_audio_7b_finetune_config() (in module bridge.recipes.qwen2_audio.qwen2_audio) qwen2_audio_collate_fn() (in module bridge.data.vlm_datasets.collate) Qwen2AudioBridge (class in bridge.models.qwen_audio.qwen2_audio_bridge) Qwen2AudioInputs (class in bridge.training.utils.visual_inputs) Qwen2AudioModel (class in bridge.models.qwen_audio.modeling_qwen2_audio) Qwen2AudioModelProvider (class in bridge.models.qwen_audio.qwen2_audio_provider) Qwen2Bridge (class in bridge.models.qwen.qwen2_bridge) qwen2vl_pad_and_split() (in module bridge.models.qwen_vl.modelling_qwen3_vl.utils) qwen35_vl_122b_a10b_peft_config() (in module bridge.recipes.qwen_vl.qwen35_vl) qwen35_vl_122b_a10b_pretrain_mock_config() (in module bridge.recipes.qwen_vl.qwen35_vl) qwen35_vl_122b_a10b_sft_config() (in module bridge.recipes.qwen_vl.qwen35_vl) qwen35_vl_27b_peft_config() (in module bridge.recipes.qwen_vl.qwen35_vl) qwen35_vl_27b_sft_config() (in module bridge.recipes.qwen_vl.qwen35_vl) qwen35_vl_2b_peft_config() (in module bridge.recipes.qwen_vl.qwen35_vl) qwen35_vl_2b_sft_config() (in module bridge.recipes.qwen_vl.qwen35_vl) qwen35_vl_35b_a3b_fsdp_sft_config() (in module bridge.recipes.qwen_vl.qwen35_vl) qwen35_vl_35b_a3b_peft_config() (in module bridge.recipes.qwen_vl.qwen35_vl) qwen35_vl_35b_a3b_pretrain_mock_config() (in module bridge.recipes.qwen_vl.qwen35_vl) qwen35_vl_35b_a3b_sft_config() (in module bridge.recipes.qwen_vl.qwen35_vl) qwen35_vl_397b_a17b_peft_config() (in module bridge.recipes.qwen_vl.qwen35_vl) qwen35_vl_397b_a17b_pretrain_mock_config() (in module bridge.recipes.qwen_vl.qwen35_vl) qwen35_vl_397b_a17b_sft_config() (in module bridge.recipes.qwen_vl.qwen35_vl) qwen35_vl_4b_peft_config() (in module bridge.recipes.qwen_vl.qwen35_vl) qwen35_vl_4b_sft_config() (in module bridge.recipes.qwen_vl.qwen35_vl) qwen35_vl_800m_peft_config() (in module bridge.recipes.qwen_vl.qwen35_vl) qwen35_vl_800m_sft_config() (in module bridge.recipes.qwen_vl.qwen35_vl) qwen35_vl_9b_peft_config() (in module bridge.recipes.qwen_vl.qwen35_vl) qwen35_vl_9b_pretrain_mock_config() (in module bridge.recipes.qwen_vl.qwen35_vl) qwen35_vl_9b_sft_config() (in module bridge.recipes.qwen_vl.qwen35_vl) Qwen35VLBridge (class in bridge.models.qwen_vl.qwen35_vl_bridge) Qwen35VLModelProvider (class in bridge.models.qwen_vl.qwen35_vl_provider) Qwen35VLMoEBridge (class in bridge.models.qwen_vl.qwen35_vl_bridge) Qwen35VLMoEModelProvider (class in bridge.models.qwen_vl.qwen35_vl_provider) qwen3_14b_peft_config() (in module bridge.recipes.qwen.qwen3) qwen3_14b_pretrain_config() (in module bridge.recipes.qwen.qwen3) qwen3_14b_sft_config() (in module bridge.recipes.qwen.qwen3) qwen3_1p7b_peft_config() (in module bridge.recipes.qwen.qwen3) qwen3_1p7b_pretrain_config() (in module bridge.recipes.qwen.qwen3) qwen3_1p7b_sft_config() (in module bridge.recipes.qwen.qwen3) qwen3_235b_a22b_peft_config() (in module bridge.recipes.qwen.qwen3_moe) qwen3_235b_a22b_pretrain_config() (in module bridge.recipes.qwen.qwen3_moe) qwen3_235b_a22b_sft_config() (in module bridge.recipes.qwen.qwen3_moe) qwen3_30b_a3b_peft_config() (in module bridge.recipes.qwen.qwen3_moe) qwen3_30b_a3b_pretrain_config() (in module bridge.recipes.qwen.qwen3_moe) qwen3_30b_a3b_sft_config() (in module bridge.recipes.qwen.qwen3_moe) qwen3_32b_peft_config() (in module bridge.recipes.qwen.qwen3) qwen3_32b_pretrain_config() (in module bridge.recipes.qwen.qwen3) qwen3_32b_sft_config() (in module bridge.recipes.qwen.qwen3) qwen3_4b_peft_config() (in module bridge.recipes.qwen.qwen3) qwen3_4b_pretrain_config() (in module bridge.recipes.qwen.qwen3) qwen3_4b_sft_config() (in module bridge.recipes.qwen.qwen3) qwen3_600m_peft_config() (in module bridge.recipes.qwen.qwen3) qwen3_600m_pretrain_config() (in module bridge.recipes.qwen.qwen3) qwen3_600m_sft_128k_config() (in module bridge.recipes.qwen.qwen3) qwen3_600m_sft_config() (in module bridge.recipes.qwen.qwen3) qwen3_8b_peft_config() (in module bridge.recipes.qwen.qwen3) qwen3_8b_pretrain_config() (in module bridge.recipes.qwen.qwen3) qwen3_8b_sft_config() (in module bridge.recipes.qwen.qwen3) qwen3_next_80b_a3b_peft_config() (in module bridge.recipes.qwen.qwen3_next) qwen3_next_80b_a3b_pretrain_config() (in module bridge.recipes.qwen.qwen3_next) qwen3_next_80b_a3b_sft_config() (in module bridge.recipes.qwen.qwen3_next) qwen3_vl_235b_a22b_peft_config() (in module bridge.recipes.qwen_vl.qwen3_vl) qwen3_vl_235b_a22b_pretrain_mock_config() (in module bridge.recipes.qwen_vl.qwen3_vl) qwen3_vl_235b_a22b_sft_config() (in module bridge.recipes.qwen_vl.qwen3_vl) qwen3_vl_30b_a3b_peft_config() (in module bridge.recipes.qwen_vl.qwen3_vl) qwen3_vl_30b_a3b_pretrain_mock_config() (in module bridge.recipes.qwen_vl.qwen3_vl) qwen3_vl_30b_a3b_sft_config() (in module bridge.recipes.qwen_vl.qwen3_vl) qwen3_vl_8b_peft_config() (in module bridge.recipes.qwen_vl.qwen3_vl) qwen3_vl_8b_peft_energon_config() (in module bridge.recipes.qwen_vl.qwen3_vl) qwen3_vl_8b_pretrain_mock_config() (in module bridge.recipes.qwen_vl.qwen3_vl) qwen3_vl_8b_sft_config() (in module bridge.recipes.qwen_vl.qwen3_vl) Qwen3ASRAudioAttention (class in bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr) Qwen3ASRAudioEncoder (class in bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr) Qwen3ASRAudioEncoderConfig (class in bridge.models.qwen3_asr.hf_qwen3_asr.configuration_qwen3_asr) Qwen3ASRAudioEncoderLayer (class in bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr) Qwen3ASRBridge (class in bridge.models.qwen3_asr.qwen3_asr_bridge) Qwen3ASRConfig (class in bridge.models.qwen3_asr.hf_qwen3_asr.configuration_qwen3_asr) Qwen3ASRForConditionalGeneration (class in bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr) Qwen3ASRModel (class in bridge.models.qwen3_asr.modeling_qwen3_asr.model) Qwen3ASRModelProvider (class in bridge.models.qwen3_asr.qwen3_asr_provider) Qwen3ASRPreTrainedModel (class in bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr) Qwen3ASRPreTrainedModelForConditionalGeneration (class in bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr) Qwen3ASRProcessor (class in bridge.models.qwen3_asr.hf_qwen3_asr.processing_qwen3_asr) Qwen3ASRProcessorKwargs (class in bridge.models.qwen3_asr.hf_qwen3_asr.processing_qwen3_asr) Qwen3ASRTextAttention (class in bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr) Qwen3ASRTextConfig (class in bridge.models.qwen3_asr.hf_qwen3_asr.configuration_qwen3_asr) Qwen3ASRTextMLP (class in bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr) Qwen3ASRTextRMSNorm (class in bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr) Qwen3ASRThinkerCausalLMOutputWithPast (class in bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr) Qwen3ASRThinkerConfig (class in bridge.models.qwen3_asr.hf_qwen3_asr.configuration_qwen3_asr) Qwen3ASRThinkerForConditionalGeneration (class in bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr) Qwen3ASRThinkerModel (class in bridge.models.qwen3_asr.modeling_qwen3_asr.thinker_model) Qwen3ASRThinkerTextAttention (class in bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr) Qwen3ASRThinkerTextDecoderLayer (class in bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr) Qwen3ASRThinkerTextMLP (class in bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr) Qwen3ASRThinkerTextModel (class in bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr) Qwen3ASRThinkerTextPreTrainedModel (class in bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr) Qwen3ASRThinkerTextRMSNorm (class in bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr) Qwen3ASRThinkerTextRotaryEmbedding (class in bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr) Qwen3ASRTransformerConfig (class in bridge.models.qwen3_asr.modeling_qwen3_asr.transformer_config) Qwen3Bridge (class in bridge.models.qwen.qwen3_bridge) Qwen3MoEBridge (class in bridge.models.qwen.qwen3_moe_bridge) Qwen3NextBridge (class in bridge.models.qwen.qwen3_next_bridge) qwen3vl_cp_split() (in module bridge.models.qwen_vl.modelling_qwen3_vl.utils) Qwen3VLBridge (class in bridge.models.qwen_vl.qwen3_vl_bridge) Qwen3VLCommonKwargs (class in bridge.recipes.qwen_vl.qwen3_vl) Qwen3VLGPTModel (class in bridge.models.qwen_vl.modelling_qwen3_vl.text_model) Qwen3VLModel (class in bridge.models.qwen_vl.modelling_qwen3_vl.model) Qwen3VLModelProvider (class in bridge.models.qwen_vl.qwen3_vl_provider) Qwen3VLMoEBridge (class in bridge.models.qwen_vl.qwen3_vl_bridge) Qwen3VLMoEModelProvider (class in bridge.models.qwen_vl.qwen3_vl_provider) Qwen3VLMultimodalRotaryEmbedding (class in bridge.models.qwen_vl.modelling_qwen3_vl.rope) Qwen3VLSelfAttention (class in bridge.models.qwen_vl.modelling_qwen3_vl.attention) Qwen3VLTransformerBlock (class in bridge.models.qwen_vl.modelling_qwen3_vl.transformer_block) Qwen3VLTransformerConfig (class in bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config) Qwen3VLVisionModel (class in bridge.models.qwen_vl.modelling_qwen3_vl.vision_model) Qwen3VLVisionPatchEmbed (class in bridge.models.qwen_vl.modelling_qwen3_vl.utils) Qwen3VLVisionPatchMerger (class in bridge.models.qwen_vl.modelling_qwen3_vl.utils) Qwen3VLVisionRotaryEmbedding (class in bridge.models.qwen_vl.modelling_qwen3_vl.utils) Qwen3VLVisionTransformerBlock (class in bridge.models.qwen_vl.modelling_qwen3_vl.transformer_block) QWEN_TOKENS (in module bridge.data.vlm_datasets.token_utils) R R (in module bridge.utils.decorators) rand_name() (in module bridge.diffusion.models.wan.inference.utils) random_seed (bridge.data.mimo.mock_provider.MockMimoProvider attribute) (bridge.data.vlm_datasets.mock_provider.MockVLMConversationProvider attribute) (bridge.recipes.qwen_vl.qwen25_vl_dataset.MockQwen25VLDatasetProvider attribute) RandomSeedDataset (class in bridge.data.samplers) rank_0_prepare_data() (in module bridge.data.datasets.utils) rank_heartbeat_timeout (bridge.recipes.run_plugins.FaultTolerancePlugin attribute) rank_offset (bridge.models.mimo.mimo_config.ModuleParallelismConfig attribute) read_metadata() (in module bridge.training.checkpointing) read_run_config() (in module bridge.training.utils.checkpoint_utils) read_train_state() (in module bridge.training.utils.checkpoint_utils) recompute_granularity (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) recompute_modules (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) recompute_num_layers (bridge.models.t5_provider.T5ModelProvider attribute) record_memory_history (bridge.recipes.run_plugins.PyTorchProfilerPlugin attribute) (bridge.recipes.run_plugins.PyTorchProfilerPluginScriptArgs attribute) record_shapes (bridge.recipes.run_plugins.NsysPlugin attribute) (bridge.recipes.run_plugins.NsysPluginScriptArgs attribute) (bridge.recipes.run_plugins.PyTorchProfilerPlugin attribute) (bridge.recipes.run_plugins.PyTorchProfilerPluginScriptArgs attribute) reduce_max_stat_across_model_parallel_group() (in module bridge.training.utils.train_utils) regex() (bridge.models.hf_pretrained.state.StateDict method) register() (bridge.training.callbacks.CallbackManager method) (in module bridge.training.mixed_precision) register_bridge() (bridge.models.conversion.model_bridge.MegatronModelBridge class method) register_bridge_implementation() (in module bridge.models.conversion.model_bridge) register_module_type() (bridge.models.conversion.param_mapping.AutoMapping class method) register_post_wrap_hook() (bridge.models.model_provider.ModelProviderMixin method) register_pre_wrap_hook() (bridge.models.model_provider.ModelProviderMixin method) relative_attention_max_distance (bridge.models.t5_provider.T5ModelProvider attribute) relative_attention_num_buckets (bridge.models.t5_provider.T5ModelProvider attribute) release() (bridge.training.utils.sig_utils.DistributedSignalHandler method) remove_non_pickleables() (in module bridge.models.conversion.utils) reorganize_inputs() (in module bridge.models.qwen_vl.modelling_qwen3_vl.utils) repeat_kv() (in module bridge.models.bailing.modeling_bailing_moe_v2) (in module bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr) repetition_penalty (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) replace_multimodal_special_tokens() (bridge.models.qwen3_asr.hf_qwen3_asr.processing_qwen3_asr.Qwen3ASRProcessor method) ReplicatedMapping (class in bridge.models.conversion.param_mapping) report_l2_norm_grad() (in module bridge.training.utils.train_utils) report_memory() (in module bridge.training.utils.train_utils) report_runtime() (in module bridge.training.utils.train_utils) report_theoretical_memory() (in module bridge.training.utils.theoretical_memory_utils) report_throughput() (in module bridge.training.utils.train_utils) report_time_interval (bridge.training.config.NVRxStragglerDetectionConfig attribute) REQUIRED_COLUMNS (in module bridge.data.datasets.packed_parquet) rerun_mode (bridge.training.config.RerunStateMachineConfig attribute) rerun_state_machine (bridge.training.config.ConfigContainer attribute) RerunStateMachineConfig (class in bridge.training.config) reset_for_restart() (bridge.training.state.GlobalState method) reset_parameters() (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2Gate method) resolve() (bridge.models.conversion.model_bridge._HFNameSuffixMapping method) (bridge.models.conversion.param_mapping.AutoMapping method) (bridge.models.conversion.param_mapping.ConcatenatedQKVMapping method) (bridge.models.conversion.param_mapping.FusedExpertMapping method) (bridge.models.conversion.param_mapping.FusedGatedExpertMapping method) (bridge.models.conversion.param_mapping.GatedMLPMapping method) (bridge.models.conversion.param_mapping.GDNLinearMapping method) (bridge.models.conversion.param_mapping.GDNLinearMappingSeparate method) (bridge.models.conversion.param_mapping.KVMapping method) (bridge.models.conversion.param_mapping.MegatronParamMapping method) (bridge.models.conversion.param_mapping.QKVMapping method) (bridge.models.conversion.quant_mapping.AmaxFanoutMapping method) (bridge.models.nemotronh.nemotron_h_bridge._MTPFlatteningMapping method) (bridge.models.nemotronh.nemotron_h_bridge._MTPFlatteningQKVMapping method) resolve_packed_parquet_paths() (in module bridge.data.datasets.packed_parquet) resolve_path() (in module bridge.utils.common_utils) resolve_slurm_local_rank() (in module bridge.utils.slurm_utils) resolve_slurm_master_addr() (in module bridge.utils.slurm_utils) resolve_slurm_master_port() (in module bridge.utils.slurm_utils) resolve_slurm_rank() (in module bridge.utils.slurm_utils) resolve_slurm_world_size() (in module bridge.utils.slurm_utils) restore_modelopt_state (bridge.models.common.base.ModelConfig attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) return_attention_mask (bridge.models.hf_pretrained.causal_lm.EncodeKwargs attribute) return_dict (bridge.models.gemma_vl.gemma3_vl_provider.Gemma3VLModelProvider attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.qwen_vl.qwen25_vl_provider.Qwen25VLModelProvider attribute) return_dict_in_generate (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) return_tensors (bridge.models.hf_pretrained.causal_lm.EncodeKwargs attribute) return_token_type_ids (bridge.models.hf_pretrained.causal_lm.EncodeKwargs attribute) reuse_grad_buf_for_mxfp8_param_ag (bridge.training.mixed_precision.MixedPrecisionConfig attribute) rewrite (bridge.data.builders.hf_dataset.HFDatasetConfig attribute) RingExchangeOverlapCfg (class in bridge.training.comm_overlap) RMSNorm (class in bridge.diffusion.models.common.normalization) RMSNorm2ZeroCenteredRMSNormMapping (class in bridge.models.conversion.param_mapping) rng (bridge.training.config.ConfigContainer attribute) roll_tensor() (in module bridge.models.bailing.modeling_bailing_moe_v2) rope() (in module bridge.diffusion.models.flux.layers) rope_deltas (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerCausalLMOutputWithPast attribute) rope_local_base_freq_from_hf() (in module bridge.models.conversion.transformers_compat) rope_params() (bridge.diffusion.models.wan.rope_utils.Wan3DRopeEmbeddings method) rope_scaling (bridge.models.gpt.gpt_builder.GPTModelConfig attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) rope_scaling_factor (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider12B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider1B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider27B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider4B attribute) (bridge.models.gpt.gpt_builder.GPTModelConfig attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) rope_scaling_factor_from_hf() (in module bridge.models.conversion.transformers_compat) rope_theta_from_hf() (in module bridge.models.conversion.transformers_compat) rope_type (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) rot_pos_emb() (bridge.models.qwen_vl.modelling_qwen3_vl.vision_model.Qwen3VLVisionModel method) rotary_base (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.gpt.gpt_builder.GPTModelConfig attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.mamba.mamba_builder.MambaModelConfig attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider14B attribute) (bridge.models.mistral.mistral_provider.MistralModelProvider attribute) (bridge.models.mistral.mistral_provider.MistralSmall3ModelProvider24B attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen3_asr.modeling_qwen3_asr.transformer_config.Qwen3ASRTransformerConfig attribute) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_omni.modeling_qwen25_omni.transformer_config.Qwen25OmniTransformerConfig attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config.Qwen3VLTransformerConfig attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) rotary_interleaved (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) rotary_percent (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.gpt.gpt_builder.GPTModelConfig attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.llama_nemotron.llama_nemotron_provider.LlamaNemotronHeterogeneousProvider attribute) (bridge.models.mamba.mamba_builder.MambaModelConfig attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.mistral.mistral_provider.MistralSmall3ModelProvider24B attribute) (bridge.models.qwen3_asr.modeling_qwen3_asr.transformer_config.Qwen3ASRTransformerConfig attribute) (bridge.models.qwen_omni.modeling_qwen25_omni.transformer_config.Qwen25OmniTransformerConfig attribute) (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config.Qwen3VLTransformerConfig attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) rotary_scaling_factor (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) rotate_half() (in module bridge.models.bailing.modeling_bailing_moe_v2) (in module bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr) router_logits (bridge.models.bailing.modeling_bailing_moe_v2.MoEV2CausalLMOutputWithPast attribute) RowParallelMapping (class in bridge.models.conversion.param_mapping) runtime_config_update() (in module bridge.training.config) S safe_import() (in module bridge.utils.import_utils) safe_import_from() (in module bridge.utils.import_utils) safe_load_config_with_retry() (in module bridge.models.hf_pretrained.safe_config_loader) safe_map() (in module bridge.data.datasets.utils) safe_pickle_load() (in module bridge.utils.safe_pickle) safe_pickle_loads() (in module bridge.utils.safe_pickle) SAFE_REPOS (in module bridge.models.hf_pretrained.utils) safe_serialize() (in module bridge.training.utils.log_utils) safe_shutdown_nvrx_straggler_manager() (in module bridge.training.nvrx_straggler) safe_yaml_representers() (in module bridge.utils.yaml_utils) SafeTensorsStateSource (class in bridge.models.hf_pretrained.state) sample_timesteps() (bridge.diffusion.common.flow_matching.flow_matching_pipeline.FlowMatchingPipeline method) sample_video_frames_to_data_urls() (in module bridge.models.nemotron_vl.nemotron_vl_utils) SarvamMLABridge (class in bridge.models.sarvam.sarvam_mla_bridge) SarvamMLAModelProvider (class in bridge.models.sarvam.sarvam_provider) SarvamMoEBridge (class in bridge.models.sarvam.sarvam_moe_bridge) SarvamMoEModelProvider (class in bridge.models.sarvam.sarvam_provider) save() (bridge.training.checkpointing.CheckpointManager method) (bridge.training.checkpointing.DefaultCheckpointManager method) save_artifacts() (bridge.diffusion.conversion.flux.flux_hf_pretrained.PreTrainedFlux method) (bridge.diffusion.conversion.wan.wan_hf_pretrained.PreTrainedWAN method) (bridge.models.hf_pretrained.base.PreTrainedBase method) save_checkpoint() (in module bridge.training.checkpointing) save_checkpoint_and_time() (in module bridge.training.train) save_converted_model_to (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) save_dir (bridge.recipes.run_plugins.WandbPlugin attribute) (bridge.recipes.run_plugins.WandbPluginScriptArgs attribute) save_generator() (bridge.diffusion.conversion.flux.flux_hf_pretrained.FluxSafeTensorsStateSource method) (bridge.diffusion.conversion.wan.wan_hf_pretrained.WanSafeTensorsStateSource method) (bridge.models.hf_pretrained.state.SafeTensorsStateSource method) save_hf_adapter() (bridge.models.conversion.auto_bridge.AutoBridge method) save_hf_pretrained() (bridge.models.config.ConfigProtocol method) (bridge.models.conversion.auto_bridge.AutoBridge method) (bridge.models.model_provider.ModelProviderMixin method) (in module bridge.models.config) save_hf_weights() (bridge.models.conversion.auto_bridge.AutoBridge method) save_megatron_model() (bridge.models.conversion.auto_bridge.AutoBridge method) (in module bridge.training.model_load_save) save_pretrained() (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) save_state() (bridge.data.energon.base_energon_datamodule.EnergonDataloader method) save_tokenizer_assets() (in module bridge.training.checkpointing) scale_add() (bridge.diffusion.models.flux.flux_layer_spec.AdaLN method) (bridge.diffusion.models.wan.wan_layer_spec.WanAdaLN method) scale_noise() (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.FlowMatchEulerDiscreteScheduler method) scaled_modulated_layernorm() (bridge.diffusion.models.flux.flux_layer_spec.AdaLN method) scatter_embedding_sequence_parallel (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider attribute) (bridge.models.gemma_vl.gemma3_vl_provider.Gemma3VLModelProvider attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.gpt.gpt_builder.GPTModelConfig attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.kimi_vl.kimi_k25_vl_provider.KimiK25VLModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_audio.qwen2_audio_provider.Qwen2AudioModelProvider attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.qwen25_vl_provider.Qwen25VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) scatter_to_tp_ranks() (bridge.models.conversion.param_mapping.MegatronParamMapping method) schedule_async_save() (in module bridge.training.checkpointing) scheduler (bridge.training.callbacks.CallbackContext attribute) (bridge.training.config.ConfigContainer attribute) (bridge.training.setup.SetupOutput attribute) scheduler_config (bridge.training.config.OptimizerConfigOverrideProviderContext attribute) SchedulerConfig (class in bridge.training.config) schedulers (bridge.training.setup_mimo.MimoSetupOutput attribute) script_args_converter_fn (bridge.recipes.run_plugins.CometPlugin attribute) (bridge.recipes.run_plugins.FaultTolerancePlugin attribute) (bridge.recipes.run_plugins.NsysPlugin attribute) (bridge.recipes.run_plugins.PerfEnvPlugin attribute) (bridge.recipes.run_plugins.PreemptionPlugin attribute) (bridge.recipes.run_plugins.PyTorchProfilerPlugin attribute) (bridge.recipes.run_plugins.WandbPlugin attribute) seconds_per_chunk (bridge.models.qwen_omni.modeling_qwen25_omni.transformer_config.Qwen25OmniTransformerConfig attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) seed (bridge.training.config.FinetuningDatasetConfig attribute) seen_checkpoints_cnt (bridge.training.state.FaultToleranceState attribute) seen_tr_iters_cnt (bridge.training.state.FaultToleranceState attribute) select_samples_to_pack() (bridge.diffusion.data.common.diffusion_task_encoder_with_sp.DiffusionTaskEncoderWithSequencePacking method) seq_len_interpolation_factor (bridge.models.gpt.gpt_builder.GPTModelConfig attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.mamba.mamba_builder.MambaModelConfig attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) seq_len_kv (bridge.diffusion.data.common.diffusion_sample.DiffusionSample attribute) seq_len_kv_padded (bridge.diffusion.data.common.diffusion_sample.DiffusionSample attribute) seq_len_q (bridge.diffusion.data.common.diffusion_sample.DiffusionSample attribute) seq_len_q_padded (bridge.diffusion.data.common.diffusion_sample.DiffusionSample attribute) seq_length (bridge.data.energon.energon_provider.EnergonProvider attribute) (bridge.data.mimo.hf_provider.HFMimoDatasetProvider attribute) (bridge.data.mimo.mock_provider.MockMimoProvider attribute) (bridge.data.vlm_datasets.hf_provider.HFDatasetConversationProvider attribute) (bridge.data.vlm_datasets.mock_provider.MockVLMConversationProvider attribute) (bridge.data.vlm_datasets.preloaded_provider.PreloadedVLMConversationProvider attribute) (bridge.diffusion.data.common.diffusion_energon_datamodule.DiffusionDataModuleConfig attribute) (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDataModuleConfig attribute) (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDatasetConfig attribute) (bridge.diffusion.data.flux.flux_mock_datamodule.FluxMockDataModuleConfig attribute) (bridge.diffusion.data.wan.wan_energon_datamodule.WanDataModuleConfig attribute) (bridge.diffusion.data.wan.wan_energon_datamodule.WanDatasetConfig attribute) (bridge.diffusion.data.wan.wan_mock_datamodule.WanMockDataModuleConfig attribute) (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider14B attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider1_3B attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider1B attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.gpt.gpt_builder.GPTModelConfig attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.gpt_provider.GPTProvider175B attribute) (bridge.models.mamba.mamba_builder.MambaModelConfig attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) (bridge.models.mistral.mistral_provider.MistralModelProvider attribute) (bridge.models.mistral.mistral_provider.MistralSmall3ModelProvider24B attribute) (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.Nemotron3NanoProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronNanoModelProvider12Bv2 attribute) (bridge.models.nemotronh.nemotron_h_provider.NemotronNanoModelProvider9Bv2 attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) (bridge.recipes.qwen_vl.qwen25_vl_dataset.MockQwen25VLDatasetProvider attribute) (bridge.recipes.qwen_vl.qwen3_vl.Qwen3VLCommonKwargs attribute) (bridge.training.config.FinetuningDatasetConfig attribute) (bridge.training.config.GPTDatasetConfig property) seq_length_dec (bridge.models.t5_provider.T5ModelProvider attribute) SEQUENCE_LENGTH_128K (in module bridge.recipes.gemma.gemma3) (in module bridge.recipes.llama.llama3) SEQUENCE_LENGTH_16K (in module bridge.recipes.llama.llama3) SEQUENCE_LENGTH_32K (in module bridge.recipes.gemma.gemma3) SEQUENCE_LENGTH_64K (in module bridge.recipes.llama.llama3) sequence_parallel (bridge.models.model_provider.ModelParallelKwargs attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.recipes.qwen_vl.qwen3_vl.Qwen3VLCommonKwargs attribute) Serializable (class in bridge.models.common.base) set_begin_index() (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.FlowMatchEulerDiscreteScheduler method) set_checkpoint_version() (in module bridge.training.checkpointing) set_data_parallel_size() (bridge.training.config.ConfigContainer method) set_decoder() (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2ForCausalLM method) set_deepseek_v3_pipeline_model_parallel_layout() (in module bridge.recipes.deepseek.deepseek_v3) set_dist_train_input_tensors() (bridge.models.qwen_vl.modelling_qwen3_vl.model.Qwen3VLModel method) set_epoch() (bridge.data.samplers.RandomSeedDataset method) set_glm_45v_pipeline_model_parallel_layout() (in module bridge.recipes.glm_vl.glm_45v) set_input_embeddings() (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2ForCausalLM method) (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2Model method) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRAudioEncoder method) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerForConditionalGeneration method) set_input_tensor() (bridge.diffusion.models.flux.flux_model.Flux method) (bridge.diffusion.models.wan.wan_model.WanModel method) (bridge.models.gemma_vl.modeling_gemma3_vl.Gemma3VLModel method) (bridge.models.glm_vl.modeling_glm_45v.GLM45VModel method) (bridge.models.kimi_vl.modeling_kimi_k25_vl.KimiK25VLModel method) (bridge.models.ministral3.modeling_ministral3.Ministral3Model method) (bridge.models.nemotron_vl.modeling_nemotron_vl.NemotronVLModel method) (bridge.models.qwen3_asr.modeling_qwen3_asr.model.Qwen3ASRModel method) (bridge.models.qwen3_asr.modeling_qwen3_asr.thinker_model.Qwen3ASRThinkerModel method) (bridge.models.qwen_audio.modeling_qwen2_audio.Qwen2AudioModel method) (bridge.models.qwen_omni.modeling_qwen25_omni.model.Qwen25OmniModel method) (bridge.models.qwen_omni.modeling_qwen25_omni.thinker_model.Qwen25OmniThinkerModel method) (bridge.models.qwen_vl.modeling_qwen25_vl.Qwen25VLModel method) (bridge.models.qwen_vl.modelling_qwen3_vl.model.Qwen3VLModel method) (bridge.models.qwen_vl.modelling_qwen3_vl.vision_model.Qwen3VLVisionModel method) set_jit_fusion_options() (in module bridge.training.initialize) set_output_embeddings() (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2ForCausalLM method) set_params_to_save() (bridge.peft.base.PEFT method) set_sm_margin (bridge.training.comm_overlap.BulkOverlapCfg attribute) (bridge.training.comm_overlap.PipelineOverlapCfg attribute) (bridge.training.comm_overlap.RingExchangeOverlapCfg attribute) set_timesteps() (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.FlowMatchEulerDiscreteScheduler method) setup() (bridge.recipes.run_plugins.CometPlugin method) (bridge.recipes.run_plugins.FaultTolerancePlugin method) (bridge.recipes.run_plugins.NsysPlugin method) (bridge.recipes.run_plugins.PerfEnvPlugin method) (bridge.recipes.run_plugins.PreemptionPlugin method) (bridge.recipes.run_plugins.PyTorchProfilerPlugin method) (bridge.recipes.run_plugins.WandbPlugin method) (bridge.training.comm_overlap.CommOverlapConfig method) (bridge.training.mixed_precision.MixedPrecisionConfig method) (in module bridge.training.fault_tolerance) (in module bridge.training.setup) setup_data_iterators() (in module bridge.data.loaders) setup_logging() (in module bridge.training.utils.log_utils) setup_mimo() (in module bridge.training.setup_mimo) setup_model_from_checkpoint() (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.FluxInferencePipeline method) (bridge.diffusion.models.wan.flow_matching.flow_inference_pipeline.FlowInferencePipeline method) setup_optimizer() (in module bridge.training.optim) SetupOutput (class in bridge.training.setup) sharded_state_dict() (bridge.diffusion.models.flux.flux_model.Flux method) (bridge.diffusion.models.wan.wan_model.WanModel method) (bridge.models.gpt_full_te_layer_autocast_spec.TETransformerLayerAutocast method) (bridge.models.minimax_m2.minimax_m2_provider._FullDimRMSNorm method) (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_block.Qwen3VLVisionTransformerBlock method) (bridge.peft.adapter_wrapper.AdapterWrapper method) (bridge.peft.canonical_lora.ModuleDict method) (bridge.peft.dora_layers.ParallelLinearDoRAAdapter method) (bridge.peft.utils.ParallelLinearAdapter method) share_embeddings_and_output_weights (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.gpt.gpt_builder.GPTModelConfig attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.llama_nemotron.llama_nemotron_provider.LlamaNemotronHeterogeneousProvider attribute) (bridge.models.mamba.mamba_builder.MambaModelConfig attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider3B attribute) (bridge.models.mistral.mistral_provider.MistralModelProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen3_asr.modeling_qwen3_asr.transformer_config.Qwen3ASRTransformerConfig attribute) (bridge.models.qwen_omni.modeling_qwen25_omni.transformer_config.Qwen25OmniTransformerConfig attribute) (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config.Qwen3VLTransformerConfig attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) shared_embedding_or_output_weight() (bridge.models.qwen3_asr.modeling_qwen3_asr.model.Qwen3ASRModel method) (bridge.models.qwen3_asr.modeling_qwen3_asr.thinker_model.Qwen3ASRThinkerModel method) (bridge.models.qwen_omni.modeling_qwen25_omni.model.Qwen25OmniModel method) (bridge.models.qwen_omni.modeling_qwen25_omni.thinker_model.Qwen25OmniThinkerModel method) (bridge.models.qwen_vl.modelling_qwen3_vl.model.Qwen3VLModel method) should_disable_forward_pre_hook() (in module bridge.training.train) should_fire() (in module bridge.training.callbacks) should_pad_vocab (bridge.models.gpt.gpt_builder.GPTModelConfig attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.mamba.mamba_builder.MambaModelConfig attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) should_profile_rank() (in module bridge.training.profiling) shutdown() (bridge.training.nvrx_straggler.NVRxStragglerDetectionManager method) (in module bridge.training.fault_tolerance) sigma (bridge.diffusion.common.flow_matching.adapters.base.FlowMatchingContext attribute) signal_handler (bridge.training.state.GlobalState property) signals_received() (bridge.training.utils.sig_utils.DistributedSignalHandler method) SimpleAdapter (class in bridge.diffusion.common.flow_matching.adapters.simple) simulate_fault (bridge.training.config.FaultToleranceConfig attribute) simulated_fault_base_delay (bridge.training.config.FaultToleranceConfig attribute) simulated_fault_rank (bridge.training.config.FaultToleranceConfig attribute) simulated_fault_type (bridge.training.config.FaultToleranceConfig attribute) sinusoidal_embedding_1d() (in module bridge.diffusion.models.wan.wan_model) SinusoidsPositionEmbedding (class in bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr) SIZE_CONFIGS (in module bridge.diffusion.models.wan.inference) skip_getting_attention_mask_from_dataset (bridge.data.vlm_datasets.hf_provider.HFDatasetConversationProvider attribute) (bridge.data.vlm_datasets.mock_provider.MockVLMConversationProvider attribute) (bridge.data.vlm_datasets.preloaded_provider.PreloadedVLMConversationProvider attribute) (bridge.recipes.qwen_vl.qwen25_vl_dataset.MockQwen25VLDatasetProvider attribute) skip_load_to_model_and_opt (bridge.training.checkpointing.CheckpointLoadContext attribute) skip_special_tokens (bridge.models.hf_pretrained.causal_lm.DecodeKwargs attribute) skip_sync_grad_norm_across_mp (bridge.training.config.TrainingConfig attribute) skip_test (bridge.data.vlm_datasets.hf_provider.HFDatasetConversationProvider attribute) skip_train (bridge.training.config.TrainingConfig attribute) skip_train_metrics_log (bridge.training.config.LoggerConfig attribute) skipped_iter (bridge.training.callbacks.CallbackContext attribute) skipped_train_samples (bridge.training.state.TrainState attribute) slice_batch_for_context_parallel() (in module bridge.utils.common_utils) slice_batch_for_mimo() (in module bridge.data.mimo.dp_utils) soft_timeout (bridge.training.config.InProcessRestartConfig attribute) softmax_scale (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider27B attribute) source (bridge.models.hf_pretrained.state.StateDict attribute) SOURCE_NAME (bridge.models.conversion.model_bridge.MegatronModelBridge attribute) sp_tokenizer_kwargs (bridge.training.tokenizers.config.TokenizerConfig attribute) SP_TOKENIZERS (in module bridge.training.tokenizers.tokenizer) spatial_merge_size (bridge.models.qwen_omni.modeling_qwen25_omni.transformer_config.Qwen25OmniTransformerConfig attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config.Qwen3VLTransformerConfig attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) special_token_ids (bridge.data.mimo.hf_provider.HFMimoDatasetProvider attribute) (bridge.data.mimo.mock_provider.MockMimoProvider attribute) (bridge.models.mimo.mimo_config.MimoParallelismConfig attribute) (bridge.models.mimo.mimo_provider.MimoModelProvider attribute) special_tokens (bridge.training.tokenizers.config.TokenizerConfig attribute) spiky_loss_factor (bridge.training.config.RerunStateMachineConfig attribute) split (bridge.data.builders.hf_dataset.HFDatasetConfig attribute) split_batch_into_microbatches() (in module bridge.data.finetuning) split_data_cp_rank() (in module bridge.models.qwen_vl.modelling_qwen3_vl.utils) split_deepstack_embs() (in module bridge.models.qwen_vl.modelling_qwen3_vl.utils) split_gdn_linear_weights() (in module bridge.models.conversion.param_mapping) split_kv_biases() (in module bridge.models.conversion.param_mapping) split_kv_weights() (in module bridge.models.conversion.param_mapping) split_part_by_cp_tp() (in module bridge.models.qwen_vl.modelling_qwen3_vl.utils) split_qkv_biases() (in module bridge.models.conversion.param_mapping) split_qkv_weights() (in module bridge.models.conversion.param_mapping) split_val_from_train (bridge.data.builders.hf_dataset.HFDatasetConfig attribute) SplitRowParallelMapping (class in bridge.diffusion.conversion.flux.flux_bridge) squared_relu() (in module bridge.training.mlm_compat.activations) start_nsys_profiler() (in module bridge.training.profiling) state (bridge.diffusion.conversion.flux.flux_hf_pretrained.PreTrainedFlux property) (bridge.diffusion.conversion.wan.wan_hf_pretrained.PreTrainedWAN property) (bridge.models.hf_pretrained.base.PreTrainedBase property) (bridge.training.callbacks.CallbackContext attribute) (bridge.training.checkpointing.CheckpointLoadContext attribute) (bridge.training.checkpointing.CheckpointSaveContext attribute) (bridge.training.setup.SetupOutput attribute) state_dict() (bridge.peft.adapter_wrapper.AdapterWrapper method) (bridge.training.state.TrainState method) StateDict (class in bridge.models.hf_pretrained.state) StateSource (class in bridge.models.hf_pretrained.state) step (bridge.training.state.TrainState attribute) step() (bridge.diffusion.common.flow_matching.flow_matching_pipeline.FlowMatchingPipeline method) (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.FlowMatchEulerDiscreteScheduler method) step_index (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.FlowMatchEulerDiscreteScheduler property) stop_if_detected (bridge.training.config.NVRxStragglerDetectionConfig attribute) stop_nsys_profiler() (in module bridge.training.profiling) storage_writers_per_rank (bridge.training.config.CheckpointConfig attribute) str2bool() (in module bridge.diffusion.models.wan.inference.utils) str_to_callable() (in module bridge.utils.activation_map) str_to_dtype() (in module bridge.utils.activation_map) straggler (bridge.training.config.ConfigContainer attribute) straggler_timer (bridge.training.state.GlobalState property) StragglerDetectionConfig (class in bridge.training.config) stream_adapter_weights_megatron_to_hf() (bridge.models.conversion.peft_bridge.MegatronPeftBridge method) (in module bridge.models.conversion.model_bridge) stream_weights_hf_to_megatron() (bridge.models.conversion.model_bridge.MegatronModelBridge method) stream_weights_megatron_to_hf() (bridge.models.conversion.model_bridge.MegatronModelBridge method) (in module bridge.models.conversion.model_bridge) strict (bridge.training.checkpointing.CheckpointLoadContext attribute) STRICT (bridge.utils.instantiate_utils.InstantiationMode attribute) strict_fsdp_dtensor_load (bridge.training.config.CheckpointConfig attribute) sub_configs (bridge.models.qwen3_asr.hf_qwen3_asr.configuration_qwen3_asr.Qwen3ASRConfig attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.configuration_qwen3_asr.Qwen3ASRThinkerConfig attribute) SUPPORTED_HF_ARCHITECTURES (in module bridge.models.conversion.auto_bridge) SUPPORTED_HF_ARCHITECTURES_DISPLAY (in module bridge.models.conversion.auto_bridge) SUPPORTED_SIZES (in module bridge.diffusion.models.wan.inference) supports() (bridge.models.conversion.auto_bridge.AutoBridge class method) supports_gradient_checkpointing (bridge.models.bailing.modeling_bailing_moe_v2.BailingMoeV2PreTrainedModel attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRPreTrainedModel attribute) (bridge.models.qwen3_asr.hf_qwen3_asr.modeling_qwen3_asr.Qwen3ASRThinkerTextPreTrainedModel attribute) SYSTEM_TOKEN (in module bridge.data.datasets.utils) T T (in module bridge.models.config) (in module bridge.training.utils.config_utils) T5Config (class in bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline) T5ModelProvider (class in bridge.models.t5_provider) talker_config (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) TARGET (bridge.utils.instantiate_utils._Keys attribute) target_modules (bridge.peft.canonical_lora.CanonicalLoRA attribute) (bridge.peft.dora.DoRA attribute) (bridge.peft.lora.LoRA attribute) (bridge.peft.module_matcher.ModuleMatcher attribute) task_encoder (bridge.data.energon.energon_provider.EnergonProvider attribute) task_encoder_seq_length (bridge.diffusion.data.common.diffusion_energon_datamodule.DiffusionDataModuleConfig attribute) task_type (bridge.diffusion.common.flow_matching.adapters.base.FlowMatchingContext attribute) te_checkpoint (in module bridge.models.qwen_vl.modelling_qwen3_vl.transformer_block) teacher (bridge.models.distillation_provider.DistillationProvider attribute) TECL (in module bridge.peft.utils) TEFusedLoRALinear (class in bridge.peft.lora_layers) TELinearAdapter (class in bridge.peft.lora_layers) temperature (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) temporal_patch_size (bridge.models.qwen_omni.modeling_qwen25_omni.transformer_config.Qwen25OmniTransformerConfig attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config.Qwen3VLTransformerConfig attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) temporal_self_attention (bridge.diffusion.models.wan.wan_layer_spec.WanWithAdaLNSubmodules attribute) temporary_distributed_context() (in module bridge.training.model_load_save) tensor_inspect (bridge.training.config.ConfigContainer attribute) tensor_inspect_end_if_enabled() (in module bridge.training.tensor_inspect) tensor_inspect_step_if_enabled() (in module bridge.training.tensor_inspect) tensor_model_parallel_size (bridge.models.mimo.mimo_config.ModuleParallelismConfig attribute) (bridge.models.model_provider.ModelParallelKwargs attribute) (bridge.recipes.qwen_vl.qwen3_vl.Qwen3VLCommonKwargs attribute) tensorboard_logger (bridge.training.state.GlobalState property) TensorInspectConfig (class in bridge.training.config) TERL (in module bridge.peft.utils) termination_grace_time (bridge.training.config.InProcessRestartConfig attribute) TERowParallelLinearLayerNorm (class in bridge.models.gemma.gemma2_provider) (class in bridge.models.gemma.gemma3_provider) test_data_iterator (bridge.training.setup.SetupOutput attribute) test_data_path (bridge.data.vlm_datasets.preloaded_provider.PreloadedVLMConversationProvider attribute) test_dataloader() (bridge.data.energon.base_energon_datamodule.EnergonMultiModalDataModule method) test_maker_kwargs (bridge.data.vlm_datasets.hf_provider.HFDatasetConversationProvider attribute) test_path (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder property) test_samples (bridge.training.config.DatasetBuildContext attribute) test_split (bridge.data.mimo.hf_provider.HFMimoDatasetProvider attribute) TETransformerLayerAutocast (class in bridge.models.gpt_full_te_layer_autocast_spec) text_column (bridge.data.mimo.hf_provider.HFMimoDatasetProvider attribute) text_dim (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) text_len (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) text_precached (bridge.diffusion.data.flux.flux_mock_datamodule.FluxMockDataModuleConfig attribute) text_prompt (bridge.data.mimo.mock_provider.MockMimoProvider attribute) thd_split_inputs_cp() (in module bridge.diffusion.models.wan.utils) thinker_config (bridge.models.qwen3_asr.qwen3_asr_provider.Qwen3ASRModelProvider attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) ThreeArgForwardStep (class in bridge.training.forward_step_func_types) ThreeArgStateForwardStep (class in bridge.training.forward_step_func_types) ThreeTupleLossReturn (in module bridge.training.forward_step_func_types) tiktoken_num_special_tokens (bridge.training.tokenizers.config.TokenizerConfig attribute) tiktoken_pattern (bridge.training.tokenizers.config.TokenizerConfig attribute) tiktoken_special_tokens (bridge.training.tokenizers.config.TokenizerConfig attribute) time_shift() (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.FlowMatchEulerDiscreteScheduler method) timers (bridge.training.state.GlobalState property) TimeStepEmbedder (class in bridge.diffusion.models.flux.layers) timesteps (bridge.diffusion.common.flow_matching.adapters.base.FlowMatchingContext attribute) timing_log_level (bridge.training.config.LoggerConfig attribute) TNvtxContext (in module bridge.training.profiling) to() (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) to_cfg_dict() (bridge.models.distillation_provider.DistillationProvider method) to_dict() (bridge.diffusion.data.common.diffusion_sample.DiffusionSample method) (bridge.training.utils.config_utils._ConfigContainerBase method) to_empty_if_meta_device() (in module bridge.models.common.unimodal) to_megatron_model() (bridge.models.conversion.auto_bridge.AutoBridge method) to_megatron_provider() (bridge.models.conversion.auto_bridge.AutoBridge method) to_yaml() (bridge.training.utils.config_utils._ConfigContainerBase method) token2wav_config (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) tokenize_dataset() (in module bridge.data.datasets.packed_sequence) tokenizer (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM property) (bridge.models.hf_pretrained.vlm.PreTrainedVLM property) (bridge.training.config.ConfigContainer attribute) (bridge.training.config.DatasetBuildContext attribute) (bridge.training.state.GlobalState property) tokenizer_class (bridge.models.qwen3_asr.hf_qwen3_asr.processing_qwen3_asr.Qwen3ASRProcessor attribute) tokenizer_model (bridge.training.tokenizers.config.TokenizerConfig attribute) tokenizer_model_name (bridge.data.datasets.packed_sequence.PackedSequenceSpecs attribute) tokenizer_path (bridge.data.mimo.mock_provider.MockMimoProvider attribute) tokenizer_prompt_format (bridge.training.tokenizers.config.TokenizerConfig attribute) tokenizer_type (bridge.training.tokenizers.config.TokenizerConfig attribute) TokenizerConfig (class in bridge.training.tokenizers.config) tokens_per_image (bridge.models.gemma_vl.modeling_gemma3_vl.Gemma3VLMultimodalProjectorConfig attribute) top_k (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) top_p (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) topology (bridge.models.mimo.mimo_provider.MimoModelInfra attribute) (bridge.models.mimo.mimo_provider.MimoModelProvider attribute) torch_dist_init() (in module bridge.training.initialize) torch_dtype_from_mcore_config() (in module bridge.training.model_load_save) torch_dtype_from_precision() (in module bridge.models.gpt_full_te_layer_autocast_spec) torch_to_numpy() (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.FluxInferencePipeline static method) torchrun_main() (in module bridge.models.decorators.torchrun) total_loss_dict (bridge.training.callbacks.CallbackContext attribute) total_model_parallel_size (bridge.models.mimo.mimo_config.ModuleParallelismConfig property) total_ranks (bridge.models.mimo.mimo_config.ModuleParallelismConfig property) total_world_size (bridge.models.mimo.mimo_config.MimoParallelismConfig property) tp_comm_bootstrap_backend (bridge.training.comm_overlap._CommOverlapConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) tp_comm_overlap (bridge.training.comm_overlap._CommOverlapConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) tp_comm_overlap_cfg (bridge.models.gpt.gpt_builder.GPTModelConfig attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) (bridge.training.comm_overlap._CommOverlapConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) tp_group (bridge.models.conversion.param_mapping.MegatronParamMapping property) tp_only_amax_red (bridge.models.gpt_provider.GPTModelProvider attribute) tp_rank (bridge.models.conversion.param_mapping.MegatronParamMapping property) tp_size (bridge.models.conversion.param_mapping.MegatronParamMapping property) (bridge.recipes.run_plugins.PerfEnvPlugin attribute) TPOverlapCfg (class in bridge.training.comm_overlap) TRACKER_PREFIX (in module bridge.training.checkpointing) (in module bridge.training.utils.checkpoint_utils) train (bridge.training.config.ConfigContainer attribute) train() (in module bridge.training.train) train_data_iterator (bridge.training.checkpointing.CheckpointSaveContext attribute) (bridge.training.setup.SetupOutput attribute) (bridge.training.setup_mimo.MimoSetupOutput attribute) train_data_path (bridge.data.vlm_datasets.preloaded_provider.PreloadedVLMConversationProvider attribute) train_dataloader() (bridge.data.energon.base_energon_datamodule.EnergonMultiModalDataModule method) train_iters (bridge.recipes.qwen_vl.qwen3_vl.Qwen3VLCommonKwargs attribute) train_mimo() (in module bridge.training.train_mimo) train_path (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder property) train_path_packed (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder property) train_samples (bridge.training.config.DatasetBuildContext attribute) train_split (bridge.data.mimo.hf_provider.HFMimoDatasetProvider attribute) train_state (bridge.training.state.GlobalState property) TRAIN_STATE_FILE (in module bridge.training.utils.checkpoint_utils) train_step() (in module bridge.training.train) train_step_mimo() (in module bridge.training.train_mimo) training_log() (in module bridge.training.utils.train_utils) TrainingConfig (class in bridge.training.config) TrainState (class in bridge.training.state) transform() (bridge.peft.base.PEFT method) (bridge.peft.canonical_lora.CanonicalLoRA method) (bridge.peft.dora.DoRA method) (bridge.peft.lora.LoRA method) (bridge.peft.lora.LoRAMerge method) transformer (bridge.models.gpt.gpt_builder.GPTModelConfig attribute) (bridge.models.mamba.mamba_builder.MambaModelConfig attribute) transformer_config (bridge.models.conversion.auto_bridge.AutoBridge property) transformer_engine_full_layer_spec() (in module bridge.models.gpt.gpt_builder) (in module bridge.models.gpt_provider) transformer_engine_layer_spec() (in module bridge.models.gpt.gpt_builder) (in module bridge.models.gpt_provider) (in module bridge.models.t5_provider) transformer_engine_mamba_stack_spec() (in module bridge.models.mamba.mamba_builder) (in module bridge.models.mamba.mamba_provider) transformer_layer_spec (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.gpt.gpt_builder.GPTModelConfig attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.llama_nemotron.llama_nemotron_provider.LlamaNemotronHeterogeneousProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) TransformerConfig (class in bridge.models.transformer_config) TransformerLayerTPOverlapCfg (class in bridge.training.comm_overlap) truncation (bridge.models.hf_pretrained.causal_lm.EncodeKwargs attribute) trust_remote_code (bridge.data.mimo.hf_provider.HFMimoDatasetProvider attribute) (bridge.data.mimo.mock_provider.MockMimoProvider attribute) (bridge.training.config.DataloaderConfig attribute) TwoArgForwardStep (class in bridge.training.forward_step_func_types) TwoTupleLossReturn (in module bridge.training.forward_step_func_types) TYPE_INSTRUCTION (in module bridge.data.datasets.utils) U UnavailableError UnavailableMeta (class in bridge.utils.import_utils) UnavailableNullContext (class in bridge.utils.import_utils) unimodal_build_distributed_models() (in module bridge.models.common.unimodal) unpad_seq_to_mult() (in module bridge.peft.utils) unpatchify() (in module bridge.diffusion.models.wan.utils) unwrap_mimo_model() (in module bridge.training.mimo_parallel_utils) update() (bridge.data.datasets.packing_utils._SegmentTree method) update_config_with_precision_overrides() (in module bridge.training.mixed_precision) use_arbitrary_attention_mask (bridge.models.gpt.gpt_builder.GPTModelConfig attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) use_cache (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) use_cpu_initialization (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) (bridge.models.mimo.mimo_provider.MimoModelProvider attribute) (bridge.models.model_provider.GetModelKwargs attribute) use_decentralized_pg (bridge.training.config.DistributedInitConfig attribute) use_dist_train (bridge.models.qwen_vl.qwen3_vl_provider.DistTrainConfig attribute) use_hf_vision_model (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config.Qwen3VLTransformerConfig attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) use_mamba_mem_eff_path (bridge.models.nemotronh.nemotron_h_provider.NemotronHModelProvider4B attribute) use_megatron_fsdp (bridge.models.model_provider.GetModelKwargs attribute) use_persistent_ckpt_worker (bridge.training.config.CheckpointConfig attribute) use_te_rng_tracker (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) use_torch_fsdp2 (bridge.models.model_provider.GetModelKwargs attribute) use_train_split_for_val (bridge.diffusion.data.common.diffusion_energon_datamodule.DiffusionDataModuleConfig attribute) use_transformer_engine_full_layer_spec (bridge.models.gpt.gpt_builder.GPTModelConfig attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.gpt_provider.GPTProvider175B attribute) use_transformer_engine_op_fuser (bridge.models.gpt.gpt_builder.GPTModelConfig attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) user_state (bridge.training.callbacks.CallbackContext attribute) (bridge.training.callbacks.CallbackManager property) userbuffers_bf16_b200_h12288_tp4_mbs1_seqlen2048 (in module bridge.training.comm_overlap) userbuffers_bf16_b200_h16384_tp4_cp2_mbs1_seqlen8192 (in module bridge.training.comm_overlap) userbuffers_bf16_b200_h18432_tp8_mbs1_seqlen4096 (in module bridge.training.comm_overlap) userbuffers_bf16_b200_h6144_tp2_mbs1_seqlen4096 (in module bridge.training.comm_overlap) userbuffers_bf16_b200_h8192_tp2_mbs1_seqlen8192 (in module bridge.training.comm_overlap) userbuffers_bf16_h100_h12288_tp4_mbs1_seqlen2048 (in module bridge.training.comm_overlap) userbuffers_bf16_h100_h16384_tp8_cp2_mbs1_seqlen8192 (in module bridge.training.comm_overlap) userbuffers_bf16_h100_h6144_tp2_mbs2_seqlen2048 (in module bridge.training.comm_overlap) userbuffers_bf16_h100_h8192_tp4_mbs1_seqlen8192 (in module bridge.training.comm_overlap) userbuffers_fp8_b200_h12288_tp4_mbs1_seqlen2048 (in module bridge.training.comm_overlap) userbuffers_fp8_b200_h16384_tp4_cp2_mbs1_seqlen8192 (in module bridge.training.comm_overlap) userbuffers_fp8_b200_h18432_tp8_mbs1_seqlen4096 (in module bridge.training.comm_overlap) userbuffers_fp8_b200_h8192_tp2_mbs1_seqlen8192 (in module bridge.training.comm_overlap) userbuffers_fp8_h100_h12288_tp4_mbs1_seqlen2048 (in module bridge.training.comm_overlap) userbuffers_fp8_h100_h16384_tp4_mbs1_seqlen2048_lora (in module bridge.training.comm_overlap) userbuffers_fp8_h100_h16384_tp8_cp2_mbs1_seqlen8192 (in module bridge.training.comm_overlap) userbuffers_fp8_h100_h6144_tp2_mbs2_seqlen2048 (in module bridge.training.comm_overlap) userbuffers_fp8_h100_h8192_tp2_mbs1_seqlen4096_lora (in module bridge.training.comm_overlap) userbuffers_fp8_h100_h8192_tp4_mbs1_seqlen8192 (in module bridge.training.comm_overlap) V v_head_dim (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) vae_channels (bridge.diffusion.data.flux.flux_mock_datamodule.FluxMockDataModuleConfig attribute) vae_scale_factor (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDataModuleConfig attribute) (bridge.diffusion.data.flux.flux_energon_datamodule.FluxDatasetConfig attribute) (bridge.diffusion.data.flux.flux_mock_datamodule.FluxMockDataModuleConfig attribute) val_dataloader() (bridge.data.energon.base_energon_datamodule.EnergonMultiModalDataModule method) (bridge.diffusion.data.common.diffusion_energon_datamodule.DiffusionDataModule method) val_maker_kwargs (bridge.data.vlm_datasets.hf_provider.HFDatasetConversationProvider attribute) val_proportion (bridge.data.builders.hf_dataset.HFDatasetConfig attribute) valid_data_iterator (bridge.training.setup.SetupOutput attribute) (bridge.training.setup_mimo.MimoSetupOutput attribute) valid_data_path (bridge.data.vlm_datasets.preloaded_provider.PreloadedVLMConversationProvider attribute) VALID_EVENTS (in module bridge.training.callbacks) valid_samples (bridge.training.config.DatasetBuildContext attribute) valid_split (bridge.data.mimo.hf_provider.HFMimoDatasetProvider attribute) validate() (bridge.training.config.ConfigContainer method) validate_data_loader_contract() (in module bridge.training.mimo_parallel_utils) validate_flex_dispatcher_backend() (in module bridge.training.flex_dispatcher_backend) validate_no_stub_ranks() (in module bridge.training.mimo_parallel_utils) validate_rope_fusion_compatibility() (in module bridge.utils.fusions) validate_row() (bridge.data.datasets.packed_parquet.GPTSFTPackedParquetDataset static method) validation (bridge.training.config.ConfigContainer attribute) validation_path (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder property) validation_path_packed (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder property) vanilla_gpt_pretrain_config() (in module bridge.recipes.gpt.vanilla_gpt) vec_in_dim (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) version (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.ClipConfig attribute) (bridge.diffusion.models.flux.flow_matching.flux_inference_pipeline.T5Config attribute) VERSION (in module bridge.package_info) video (bridge.diffusion.data.common.diffusion_sample.DiffusionSample attribute) video_end_token_id (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) video_grid_thw (bridge.training.utils.visual_inputs.GenericVisualInputs attribute) video_latents (bridge.diffusion.common.flow_matching.adapters.base.FlowMatchingContext property) video_metadata (bridge.diffusion.data.common.diffusion_sample.DiffusionSample attribute) video_start_token_id (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) video_token_id (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.qwen_omni.modeling_qwen25_omni.transformer_config.Qwen25OmniTransformerConfig attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config.Qwen3VLTransformerConfig attribute) (bridge.models.qwen_vl.qwen25_vl_provider.Qwen25VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) videohandler (class in bridge.data.energon.task_encoder_utils) videos (bridge.data.energon.task_encoder_utils.ChatMLSample attribute) virtual_pipeline_model_parallel_size (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.model_provider.ModelParallelKwargs attribute) vision_config (bridge.models.gemma_vl.gemma3_vl_provider.Gemma3VLModelProvider attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.kimi_vl.kimi_k25_vl_provider.KimiK25VLModelProvider attribute) (bridge.models.qwen_vl.qwen25_vl_provider.Qwen25VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) vision_context_parallel_size (bridge.models.qwen_vl.qwen3_vl_provider.DistTrainConfig attribute) vision_cuda_graph_impl (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) vision_cuda_graph_scope (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) vision_dp_when_cp (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config.Qwen3VLTransformerConfig attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) vision_encoder_module (bridge.models.mimo.llava_provider.LlavaMimoProvider attribute) vision_encoder_params (bridge.models.mimo.llava_provider.LlavaMimoProvider attribute) vision_end_token_id (bridge.models.gemma_vl.gemma3_vl_provider.Gemma3VLModelProvider attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.qwen25_vl_provider.Qwen25VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) vision_expert_model_parallel_size (bridge.models.qwen_vl.qwen3_vl_provider.DistTrainConfig attribute) vision_expert_tensor_parallel_size (bridge.models.qwen_vl.qwen3_vl_provider.DistTrainConfig attribute) vision_model_type (bridge.models.nemotron_vl.nemotron_vl_provider.NemotronNano12Bv2VLModelProvider attribute) vision_pipeline_model_parallel_size (bridge.models.qwen_vl.qwen3_vl_provider.DistTrainConfig attribute) vision_projector_config (bridge.models.gemma_vl.gemma3_vl_provider.Gemma3VLModelProvider attribute) vision_projector_input_size (bridge.models.mimo.llava_provider.LlavaMimoProvider attribute) vision_start_token_id (bridge.models.gemma_vl.gemma3_vl_provider.Gemma3VLModelProvider attribute) (bridge.models.qwen_omni.modeling_qwen25_omni.transformer_config.Qwen25OmniTransformerConfig attribute) (bridge.models.qwen_omni.qwen25_omni_provider.Qwen25OmniModelProvider attribute) (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config.Qwen3VLTransformerConfig attribute) (bridge.models.qwen_vl.qwen25_vl_provider.Qwen25VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLModelProvider attribute) (bridge.models.qwen_vl.qwen35_vl_provider.Qwen35VLMoEModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLModelProvider attribute) (bridge.models.qwen_vl.qwen3_vl_provider.Qwen3VLMoEModelProvider attribute) vision_tensor_model_parallel_size (bridge.models.qwen_vl.qwen3_vl_provider.DistTrainConfig attribute) vision_to_llm_dp_ratio (bridge.models.qwen_vl.qwen3_vl_provider.DistTrainConfig attribute) vision_token_id (bridge.models.qwen_vl.qwen25_vl_provider.Qwen25VLModelProvider attribute) vision_world_size (bridge.models.qwen_vl.qwen3_vl_provider.DistTrainConfig attribute) visual_tensors (bridge.data.energon.hf_encoder_task_encoder.HFEncoderTaskBatch attribute) (bridge.data.energon.hf_encoder_task_encoder.HFEncoderTaskSample attribute) VLMConversationDataset (class in bridge.data.vlm_datasets.conversation_dataset) VLMLoRA (class in bridge.peft.lora) VLMType (in module bridge.models.hf_pretrained.vlm) vocab_extra_ids (bridge.training.tokenizers.config.TokenizerConfig attribute) vocab_file (bridge.training.tokenizers.config.TokenizerConfig attribute) vocab_size (bridge.diffusion.models.flux.flux_provider.FluxProvider attribute) (bridge.diffusion.models.wan.wan_provider.WanModelProvider attribute) (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider12B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider1B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider27B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider4B attribute) (bridge.models.gemma.gemma_provider.GemmaModelProvider attribute) (bridge.models.glm_vl.glm_45v_provider.GLM45VModelProvider attribute) (bridge.models.gpt.gpt_builder.GPTModelConfig attribute) (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.mamba.mamba_builder.MambaModelConfig attribute) (bridge.models.mamba.mamba_provider.MambaModelProvider attribute) (bridge.models.mimo.llava_provider.LlavaMimoProvider attribute) (bridge.models.mistral.mistral_provider.MistralModelProvider attribute) (bridge.models.mistral.mistral_provider.MistralSmall3ModelProvider24B attribute) (bridge.models.olmoe.olmoe_provider.OlMoEModelProvider attribute) (bridge.models.qwen3_asr.modeling_qwen3_asr.transformer_config.Qwen3ASRTransformerConfig attribute) (bridge.models.qwen_omni.modeling_qwen25_omni.transformer_config.Qwen25OmniTransformerConfig attribute) (bridge.models.qwen_vl.modelling_qwen3_vl.transformer_config.Qwen3VLTransformerConfig attribute) (bridge.models.sarvam.sarvam_provider.SarvamMLAModelProvider attribute) (bridge.models.sarvam.sarvam_provider.SarvamMoEModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) (bridge.training.tokenizers.config.TokenizerConfig attribute) vp_stage (bridge.models.conversion.model_bridge.MegatronWeightTuple attribute) (bridge.models.conversion.model_bridge.WeightConversionTask attribute) W W_latents (bridge.diffusion.data.wan.wan_energon_datamodule.WanDatasetConfig attribute) (bridge.diffusion.data.wan.wan_mock_datamodule.WanMockDataModuleConfig attribute) walk() (in module bridge.peft.walk_utils) Wan3DRopeEmbeddings (class in bridge.diffusion.models.wan.rope_utils) wan_14b_pretrain_config() (in module bridge.diffusion.recipes.wan.wan) wan_14b_sft_config() (in module bridge.diffusion.recipes.wan.wan) wan_1_3b_pretrain_config() (in module bridge.diffusion.recipes.wan.wan) wan_1_3b_sft_config() (in module bridge.diffusion.recipes.wan.wan) wan_data_step() (in module bridge.diffusion.models.wan.wan_step) WanAdaLN (class in bridge.diffusion.models.wan.wan_layer_spec) WanAdapter (class in bridge.diffusion.models.wan.flow_matching.flow_matching_pipeline_wan) WanBridge (class in bridge.diffusion.conversion.wan.wan_bridge) WanDataModuleConfig (class in bridge.diffusion.data.wan.wan_energon_datamodule) WanDatasetConfig (class in bridge.diffusion.data.wan.wan_energon_datamodule) wandb_logger (bridge.training.state.GlobalState property) WandbPlugin (class in bridge.recipes.run_plugins) WandbPluginScriptArgs (class in bridge.recipes.run_plugins) WanFlowMatchingPipeline (class in bridge.diffusion.models.wan.flow_matching.flow_matching_pipeline_wan) WanForwardStep (class in bridge.diffusion.models.wan.wan_step) WanLayerWithAdaLN (class in bridge.diffusion.models.wan.wan_layer_spec) WanMockDataModuleConfig (class in bridge.diffusion.data.wan.wan_mock_datamodule) WanModel (class in bridge.diffusion.models.wan.wan_model) WanModelProvider (class in bridge.diffusion.models.wan.wan_provider) WanModelProvider14B (class in bridge.diffusion.models.wan.wan_provider) WanModelProvider1_3B (class in bridge.diffusion.models.wan.wan_provider) WanSafeTensorsStateSource (class in bridge.diffusion.conversion.wan.wan_hf_pretrained) WanTaskEncoder (class in bridge.diffusion.data.wan.wan_taskencoder) WanWithAdaLNSubmodules (class in bridge.diffusion.models.wan.wan_layer_spec) warn_rank_0() (in module bridge.utils.common_utils) warning_filter() (in module bridge.training.utils.log_utils) weight (bridge.models.conversion.model_bridge.HFWeightTuple attribute) (bridge.models.conversion.model_bridge.MegatronWeightTuple attribute) WeightConversionTask (class in bridge.models.conversion.model_bridge) weights_verification_table() (in module bridge.models.conversion.utils) WeightType (in module bridge.models.conversion.param_mapping) wgrad_deferral_limit (bridge.training.comm_overlap._CommOverlapConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) wildcard_match() (in module bridge.peft.utils) window_size (bridge.models.gemma.gemma2_provider.Gemma2ModelProvider attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider12B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider1B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider27B attribute) (bridge.models.gemma.gemma3_provider.Gemma3ModelProvider4B attribute) (bridge.models.mistral.mistral_provider.MistralModelProvider attribute) (bridge.models.mistral.mistral_provider.MistralSmall3ModelProvider24B attribute) workspace (bridge.recipes.run_plugins.CometPlugin attribute) (bridge.recipes.run_plugins.CometPluginScriptArgs attribute) wrap_mimo_model_distributed() (in module bridge.models.mimo.mimo_ddp) wrap_train_step_function() (bridge.training.nvrx_straggler.NVRxStragglerDetectionManager method) wrap_with_ddp (bridge.models.model_provider.GetModelKwargs attribute) write_packed_parquet() (in module bridge.data.datasets.packed_parquet) Y yarn_beta_fast (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) yarn_beta_slow (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) yarn_correction_range_round_to_int (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) yarn_mscale (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) yarn_mscale_all_dim (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) yarn_original_max_position_embeddings (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) YARN_ROPE_SCALING_MAPPING (bridge.models.conversion.model_bridge.MegatronModelBridge attribute) yarn_rotary_scaling_factor (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.ministral3.ministral3_provider.Ministral3ModelProvider attribute) Z z_loss (bridge.models.bailing.modeling_bailing_moe_v2.MoEV2CausalLMOutputWithPast attribute) zero_grad_buffer_for_multimodule() (in module bridge.training.mimo_parallel_utils)