Index _ | A | B | C | D | E | F | G | H | I | K | L | M | N | O | P | Q | R | S | T | U | V | W _ __abs__() (bridge.utils.import_utils.UnavailableMeta method) __add__() (bridge.utils.import_utils.UnavailableMeta method) __all__ (in module bridge) (in module bridge.data.hf_processors) (in module bridge.models) (in module bridge.models.conversion) (in module bridge.models.decorators) (in module bridge.models.decorators.dispatch) (in module bridge.models.hf_pretrained) (in module bridge.models.llama) (in module bridge.models.mamba) (in module bridge.models.qwen) __bool__() (bridge.peft.walk_utils.HasBool method) __call__() (bridge.data.builders.hf_dataset.ProcessExampleFn method) (bridge.models.decorators.dispatch._Dispatch method) (bridge.models.gpt_full_te_layer_autocast_spec.TETransformerLayerAutocast method) (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.models.hf_pretrained.state.StateDict method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) (bridge.peft.base.PEFT method) (bridge.training.tokenizers.tokenizer.MegatronTokenizer method) (bridge.utils.import_utils.UnavailableMeta method) __contact_emails__ (in module bridge.package_info) __contact_names__ (in module bridge.package_info) __contains__() (bridge.models.hf_pretrained.state.StateDict method) __deepcopy__() (bridge.training.utils.config_utils._ConfigContainerBase method) __del__() (bridge.data.datasets.utils._TextMemMapDataset method) __delete__() (bridge.utils.import_utils.UnavailableMeta method) __delitem__() (bridge.utils.import_utils.UnavailableMeta method) __description__ (in module bridge.package_info) __divmod__() (bridge.utils.import_utils.UnavailableMeta method) __download_url__ (in module bridge.package_info) __enter__() (bridge.training.utils.sig_utils.DistributedSignalHandler method) (bridge.utils.import_utils.UnavailableMeta method) (bridge.utils.import_utils.UnavailableNullContext method) __eq__() (bridge.utils.import_utils.UnavailableMeta method) __exit__() (bridge.training.utils.sig_utils.DistributedSignalHandler method) (bridge.utils.import_utils.UnavailableNullContext method) __floordiv__() (bridge.utils.import_utils.UnavailableMeta method) __ge__() (bridge.utils.import_utils.UnavailableMeta method) __get__() (bridge.utils.import_utils.UnavailableMeta method) __getattr__() (bridge.utils.import_utils.UnavailableMeta method) __getitem__() (bridge.data.datasets.sft.GPTSFTDataset method) (bridge.data.datasets.sft.GPTSFTPackedDataset method) (bridge.data.datasets.utils._OnlineSampleMapping method) (bridge.data.datasets.utils._TextMemMapDataset method) (bridge.data.samplers.RandomSeedDataset method) (bridge.models.hf_pretrained.state.StateDict method) (bridge.models.hf_pretrained.state.StateSource method) __gt__() (bridge.utils.import_utils.UnavailableMeta method) __hash__() (bridge.utils.import_utils.UnavailableMeta method) __homepage__ (in module bridge.package_info) __iadd__() (bridge.utils.import_utils.UnavailableMeta method) __idx_suffix__ (in module bridge.data.datasets.sft) (in module bridge.data.datasets.utils) __idx_version__ (in module bridge.data.datasets.sft) (in module bridge.data.datasets.utils) __ifloordiv__() (bridge.utils.import_utils.UnavailableMeta method) __ilshift__() (bridge.utils.import_utils.UnavailableMeta method) __imul__() (bridge.utils.import_utils.UnavailableMeta method) __index__() (bridge.utils.import_utils.UnavailableMeta method) __invert__() (bridge.utils.import_utils.UnavailableMeta method) __ipow__() (bridge.utils.import_utils.UnavailableMeta method) __irshift__() (bridge.utils.import_utils.UnavailableMeta method) __isub__() (bridge.utils.import_utils.UnavailableMeta method) __iter__() (bridge.data.samplers.MegatronPretrainingRandomSampler method) (bridge.data.samplers.MegatronPretrainingSampler method) (bridge.models.conversion.mapping_registry.MegatronMappingRegistry method) (bridge.models.hf_pretrained.state.StateDict method) (bridge.models.hf_pretrained.state.StateSource method) (bridge.utils.import_utils.UnavailableMeta method) __itruediv__() (bridge.utils.import_utils.UnavailableMeta method) __keywords__ (in module bridge.package_info) __le__() (bridge.utils.import_utils.UnavailableMeta method) __len__() (bridge.data.datasets.sft.GPTSFTDataset method) (bridge.data.datasets.utils._OnlineSampleMapping method) (bridge.data.datasets.utils._TextMemMapDataset method) (bridge.data.samplers.MegatronPretrainingRandomSampler method) (bridge.data.samplers.MegatronPretrainingSampler method) (bridge.data.samplers.RandomSeedDataset method) (bridge.models.conversion.mapping_registry.MegatronMappingRegistry method) (bridge.models.hf_pretrained.state.StateDict method) (bridge.models.hf_pretrained.state.StateSource method) (bridge.training.tokenizers.gpt2_tokenization.GPT2Tokenizer method) (bridge.utils.import_utils.UnavailableMeta method) __license__ (in module bridge.package_info) __lshift__() (bridge.utils.import_utils.UnavailableMeta method) __lt__() (bridge.utils.import_utils.UnavailableMeta method) __mul__() (bridge.utils.import_utils.UnavailableMeta method) __ne__() (bridge.utils.import_utils.UnavailableMeta method) __neg__() (bridge.utils.import_utils.UnavailableMeta method) __new__() (bridge.utils.import_utils.UnavailableMeta method) __package_name__ (in module bridge.package_info) __post_init__() (bridge.data.datasets.packed_sequence.PackedSequenceSpecs method) (bridge.peft.canonical_lora.CanonicalLoRA method) (bridge.peft.dora.DoRA method) (bridge.training.comm_overlap.CommOverlapConfig method) (bridge.training.config.CheckpointConfig method) (bridge.training.config.GPTDatasetConfig method) (bridge.training.config.NVRxStragglerDetectionConfig method) (bridge.training.config.ProfilingConfig method) (bridge.training.config.SchedulerConfig method) (bridge.training.mixed_precision.MixedPrecisionConfig method) __pow__() (bridge.utils.import_utils.UnavailableMeta method) __radd__() (bridge.utils.import_utils.UnavailableMeta method) __rdivmod__() (bridge.utils.import_utils.UnavailableMeta method) __reduce__() (bridge.data.datasets.utils._OnlineSampleMapping method) __reduce_ex__() (bridge.data.datasets.utils._OnlineSampleMapping method) __repository_url__ (in module bridge.package_info) __repr__() (bridge.models.conversion.auto_bridge.AutoBridge method) (bridge.models.conversion.mapping_registry.MegatronMappingRegistry method) (bridge.models.decorators.dispatch._Dispatch method) (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.models.hf_pretrained.state.StateDict method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) __rfloordiv__() (bridge.utils.import_utils.UnavailableMeta method) __rlshift__() (bridge.utils.import_utils.UnavailableMeta method) __rmul__() (bridge.utils.import_utils.UnavailableMeta method) __rpow__() (bridge.utils.import_utils.UnavailableMeta method) __rrshift__() (bridge.utils.import_utils.UnavailableMeta method) __rshift__() (bridge.utils.import_utils.UnavailableMeta method) __rsub__() (bridge.utils.import_utils.UnavailableMeta method) __rtruediv__() (bridge.utils.import_utils.UnavailableMeta method) __setattr__() (bridge.training.mixed_precision.MixedPrecisionConfig method) __setitem__() (bridge.utils.import_utils.UnavailableMeta method) __shortversion__ (in module bridge.package_info) __slots__ (bridge.models.decorators.dispatch._Dispatch attribute) __str__() (bridge.data.datasets.utils._OnlineSampleMapping method) __sub__() (bridge.utils.import_utils.UnavailableMeta method) __truediv__() (bridge.utils.import_utils.UnavailableMeta method) __version__ (bridge.training.utils.config_utils._ConfigContainerBase attribute) (in module bridge.package_info) _add_speaker_and_signal() (in module bridge.data.datasets.utils) _All2AllHp2Sp (class in bridge.peft.utils) _apply_cfgs() (bridge.training.comm_overlap.CommOverlapConfig method) _apply_image_tag() (bridge.training.tokenizers.multimodal_tokenizer.MultimodalTokenizer method) _apply_overrides() (in module bridge.training.utils.omegaconf_utils) _apply_peft_transformation() (in module bridge.training.setup) _BertWordPieceTokenizer (class in bridge.training.tokenizers.tokenizer) _BridgeImplClass (in module bridge.models.conversion.model_bridge) _broadcast_shared_embeddings() (bridge.models.conversion.model_bridge.MegatronModelBridge method) _build_data_from_text() (bridge.data.datasets.utils._JSONLMemMapDataset method) (bridge.data.datasets.utils._TextMemMapDataset method) _build_datasets() (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder method) _build_loss_mask() (bridge.data.datasets.sft.GPTSFTDataset method) (bridge.data.datasets.sft.GPTSFTPackedDataset method) _build_memmap_index_files() (in module bridge.data.datasets.utils) _build_samples_mapping() (bridge.data.datasets.sft.GPTSFTChatDataset method) (bridge.data.datasets.sft.GPTSFTDataset method) (bridge.data.datasets.sft.GPTSFTPackedDataset method) _build_sharded_state_dict_metadata() (in module bridge.training.checkpointing) _cached_get_key_to_filename_map() (bridge.models.hf_pretrained.state.SafeTensorsStateSource static method) _call_target() (in module bridge.utils.instantiate_utils) _ceil_to_nearest() (bridge.data.datasets.sft.GPTSFTDataset method) _CHECKPOINT_VERSION (in module bridge.training.checkpointing) _clean_text() (bridge.training.tokenizers.bert_tokenization.BasicTokenizer method) _collate_item() (bridge.data.datasets.sft.GPTSFTDataset method) _CommOverlapConfig (class in bridge.training.comm_overlap) _ConfigContainerBase (class in bridge.training.utils.config_utils) _contains_code_references() (in module bridge.models.config) _convert_node() (in module bridge.utils.instantiate_utils) _convert_target_to_string() (in module bridge.utils.instantiate_utils) _convert_value_to_dict() (bridge.training.utils.config_utils._ConfigContainerBase class method) (in module bridge.models.config) _create_attention_mask() (bridge.data.datasets.sft.GPTSFTDataset method) _create_config_from_provider() (bridge.models.conversion.auto_bridge.AutoBridge method) _create_dataset() (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder method) _create_list_wrapper() (in module bridge.peft.walk_utils) _create_model() (in module bridge.models.model_provider) _create_peft_pre_wrap_hook() (in module bridge.training.setup) _dataclass_to_omegaconf_dict() (in module bridge.training.utils.omegaconf_utils) _ddp_wrap() (in module bridge.models.model_provider) _deallocate_indexed_dataset_memory() (in module bridge.data.datasets.utils) _detect_parallelism_type() (bridge.models.conversion.param_mapping.AutoMapping method) _Dispatch (class in bridge.models.decorators.dispatch) _dispatch() (bridge.models.decorators.dispatch._Dispatch method) _encode() (bridge.training.tokenizers.multimodal_tokenizer.MultimodalTokenizer method) _enum_representer() (in module bridge.utils.yaml_utils) _EXCLUDE_FIELD (in module bridge.training.utils.omegaconf_utils) _extract_megatron_lm_args_from_state_dict() (in module bridge.training.checkpointing) _extract_pos_args() (in module bridge.utils.instantiate_utils) _extract_tokenizer_model_name() (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder method) _fetch_sample_from_memmap() (bridge.data.datasets.utils._TextMemMapDataset method) _finish_train() (in module bridge.training.train) _format_gpu_scores() (bridge.training.nvrx_straggler.NVRxStragglerDetectionManager static method) _format_location() (bridge.models.decorators.dispatch._Dispatch method) _format_no_implementation_error() (bridge.models.decorators.dispatch._Dispatch method) _function_representer() (in module bridge.utils.yaml_utils) _gather_flag_from_rank0() (bridge.training.nvrx_straggler.NVRxStragglerDetectionManager method) _generate_model_state_dict() (in module bridge.training.checkpointing) _generation_config_representer() (in module bridge.utils.yaml_utils) _get_activation_fn() (bridge.peft.utils.ParallelLinearAdapter method) _get_all_keys() (bridge.models.hf_pretrained.state.StateDict method) _get_artifact_name_and_version() (in module bridge.training.utils.wandb_utils) _get_causal_lm_architecture() (bridge.models.conversion.auto_bridge.AutoBridge method) _get_config() (bridge.models.conversion.param_mapping.MegatronParamMapping method) _get_func_kwargs() (in module bridge.peft.walk_utils) _get_header_conversation_type_mask_role() (in module bridge.data.datasets.utils) _get_init_fn() (bridge.peft.utils.ParallelLinearAdapter method) _get_key_to_filename_map() (bridge.models.hf_pretrained.state.SafeTensorsStateSource method) _get_layer_offset() (bridge.models.gpt_full_te_layer_autocast_spec.TETransformerLayerAutocast method) _get_model_comm_overlap_cfgs() (bridge.training.comm_overlap.CommOverlapConfig method) _get_model_instance() (bridge.models.conversion.auto_bridge.AutoBridge method) _get_non_persistent_iteration() (in module bridge.training.checkpointing) _get_optimizer_overlap_cfgs() (bridge.training.comm_overlap.CommOverlapConfig method) _get_or_create_mapping() (bridge.models.conversion.param_mapping.AutoMapping method) _get_provider_from_model() (bridge.models.conversion.model_bridge.MegatronModelBridge method) _get_samples_mapping() (in module bridge.data.datasets.utils) _get_scheduler() (in module bridge.training.optim) _get_train_state_from_state_dict() (in module bridge.training.checkpointing) _get_transformer_layer_spec() (in module bridge.training.mlm_compat.model) _get_wandb_artifact_tracker_filename() (in module bridge.training.utils.wandb_utils) _get_weight_norm() (bridge.peft.dora_layers.DoRALinear method) _GPT2BPETokenizer (class in bridge.training.tokenizers.tokenizer) _gpt_provider() (in module bridge.training.mlm_compat.model) _GPTSentencePieceTokenizer (class in bridge.training.tokenizers.tokenizer) _handle_straggler_report() (bridge.training.nvrx_straggler.NVRxStragglerDetectionManager method) _HuggingFaceTokenizer (class in bridge.training.tokenizers.tokenizer) _identify_start_index_of_subsequence() (in module bridge.data.datasets.utils) _index_file_exists() (in module bridge.data.datasets.utils) _index_fn() (in module bridge.data.datasets.utils) _init_adapter() (bridge.peft.lora_layers.LinearAdapter static method) (bridge.peft.lora_layers.TELinearAdapter static method) _initalize() (bridge.training.tokenizers.tokenizer._GPTSentencePieceTokenizer method) (bridge.training.tokenizers.tokenizer._SentencePieceTokenizer method) _initialize_distributed() (in module bridge.training.initialize) _initialize_tp_communicators() (in module bridge.training.initialize) _is_chinese_char() (bridge.training.tokenizers.bert_tokenization.BasicTokenizer method) _is_control() (in module bridge.training.tokenizers.bert_tokenization) _is_model_section() (in module bridge.training.checkpointing) _is_omegaconf_problematic() (in module bridge.training.utils.omegaconf_utils) _is_punctuation() (in module bridge.training.tokenizers.bert_tokenization) _is_target() (in module bridge.utils.instantiate_utils) _is_whitespace() (in module bridge.training.tokenizers.bert_tokenization) _JSONLMemMapDataset (class in bridge.data.datasets.utils) _Keys (class in bridge.utils.instantiate_utils) _Llama2Tokenizer (class in bridge.training.tokenizers.tokenizer) _load_args_from_checkpoint() (in module bridge.training.mlm_compat.arguments) _load_base_checkpoint() (in module bridge.training.checkpointing) _load_checkpoint_from_path() (in module bridge.training.checkpointing) _load_config() (bridge.models.hf_pretrained.base.PreTrainedBase method) (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) _load_dataset() (bridge.data.builders.hf_dataset.HFDatasetBuilder method) (bridge.data.datasets.sft.GPTSFTDataset method) (bridge.data.datasets.sft.GPTSFTPackedDataset method) _load_generation_config() (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) _load_global_dist_base_checkpoint() (in module bridge.training.checkpointing) _load_image_processor() (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) _load_model() (bridge.models.hf_pretrained.base.PreTrainedBase method) (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) _load_model_state_dict() (in module bridge.training.checkpointing) _load_model_weights_from_checkpoint() (in module bridge.training.checkpointing) _load_non_persistent_base_checkpoint() (in module bridge.training.checkpointing) _load_processor() (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) _load_state_if_exists() (in module bridge.training.fault_tolerance) _load_tensors() (bridge.models.hf_pretrained.state.StateDict method) _load_tokenizer() (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) _locate() (in module bridge.utils.instantiate_utils) _log_gpu_perf_scores() (bridge.training.nvrx_straggler.NVRxStragglerDetectionManager method) _log_gpu_scores() (bridge.training.nvrx_straggler.NVRxStragglerDetectionManager method) _make_indexed_dataset_compatibility() (in module bridge.data.datasets.utils) _mamba_provider() (in module bridge.training.mlm_compat.model) _map_module() (in module bridge.peft.walk_utils) _map_module_dict() (in module bridge.peft.walk_utils) _map_module_list() (in module bridge.peft.walk_utils) _mask_targets() (in module bridge.data.datasets.utils) _match_keys() (bridge.models.hf_pretrained.state.StateDict method) _maybe_cast_to_list() (bridge.data.datasets.sft.GPTSFTDataset method) (bridge.data.datasets.sft.GPTSFTPackedDataset method) _maybe_update_timeouts() (in module bridge.training.fault_tolerance) _maybe_validate_prompt_template() (bridge.data.datasets.sft.GPTSFTChatDataset method) (bridge.data.datasets.sft.GPTSFTDataset method) _megatron_global_param_names_all_pp_ranks() (bridge.models.conversion.model_bridge.MegatronModelBridge method) _megatron_local_name_to_global() (in module bridge.models.conversion.model_bridge) _MIN_ITERS_FOR_STEP_TIMEOUT_UPDATE (in module bridge.training.fault_tolerance) _model_bridge (bridge.models.conversion.auto_bridge.AutoBridge property) _MODULE_TYPE_REGISTRY (bridge.models.conversion.param_mapping.AutoMapping attribute) _multiple_truncation() (bridge.data.datasets.sft.GPTSFTDataset method) _NON_PERSISTENT_CKPT_SUBDIR (in module bridge.training.checkpointing) _normalize_expert_param_name() (bridge.models.conversion.param_mapping.MegatronParamMapping method) _NullTokenizer (class in bridge.training.tokenizers.tokenizer) _NUM_WARMUP_ITERS (in module bridge.training.fault_tolerance) _OnlineSampleMapping (class in bridge.data.datasets.utils) _override_user_cfgs() (bridge.training.comm_overlap.CommOverlapConfig method) _partial_representer() (in module bridge.utils.yaml_utils) _populate_vocab() (bridge.training.tokenizers.tokenizer._SentencePieceTokenizer method) _prepare_input_dict_or_list() (in module bridge.utils.instantiate_utils) _preprocess() (in module bridge.data.datasets.utils) _print_gpu_scores() (bridge.training.nvrx_straggler.NVRxStragglerDetectionManager method) _print_num_params() (in module bridge.models.model_provider) _print_stragglers() (bridge.training.nvrx_straggler.NVRxStragglerDetectionManager method) _process_example() (bridge.data.datasets.sft.GPTSFTChatDataset method) (bridge.data.datasets.sft.GPTSFTDataset method) _REGISTRY (in module bridge.data.utils) _resolve_names() (bridge.models.conversion.param_mapping.MegatronParamMapping method) _resolve_path() (bridge.models.hf_pretrained.state.SafeTensorsStateSource static method) _resolve_target() (in module bridge.utils.instantiate_utils) _response_value_formater() (in module bridge.data.datasets.utils) _restore_excluded_fields() (in module bridge.training.utils.omegaconf_utils) _run_split_on_punc() (bridge.training.tokenizers.bert_tokenization.BasicTokenizer method) _run_strip_accents() (bridge.training.tokenizers.bert_tokenization.BasicTokenizer method) _safe_object_representer() (in module bridge.utils.yaml_utils) _SentencePieceTokenizer (class in bridge.training.tokenizers.tokenizer) _separate_template() (bridge.data.datasets.sft.GPTSFTDataset method) _set_num_cuda_device_max_connections() (bridge.training.comm_overlap.CommOverlapConfig method) _set_random_seed() (in module bridge.training.initialize) _set_signal_handler() (bridge.training.state.GlobalState method) _SignatureType (in module bridge.models.decorators.dispatch) _TextMemMapDataset (class in bridge.data.datasets.utils) _timers_write_to_wandb() (in module bridge.training.state) _TModule (in module bridge.peft.walk_utils) _to_dict() (in module bridge.models.config) _tokenize_chinese_chars() (bridge.training.tokenizers.bert_tokenization.BasicTokenizer method) _tokenizer_config_from_args() (in module bridge.training.mlm_compat.arguments) _torch_dtype_representer() (in module bridge.utils.yaml_utils) _track_excluded_fields() (in module bridge.training.utils.omegaconf_utils) _transformer_config_from_args() (in module bridge.training.mlm_compat.arguments) _transpose_first_dim() (in module bridge.training.checkpointing) _truncation() (bridge.data.datasets.sft.GPTSFTDataset method) _unwrap_name() (bridge.models.conversion.model_bridge.MegatronModelBridge method) _update_model_config_funcs() (in module bridge.training.setup) _update_timeouts() (in module bridge.training.fault_tolerance) _validate_config() (bridge.models.conversion.auto_bridge.AutoBridge class method) _validate_patterns() (bridge.models.conversion.param_mapping.MegatronParamMapping method) _verify_no_callables() (in module bridge.training.utils.omegaconf_utils) _vocab_size_with_padding() (in module bridge.training.tokenizers.tokenizer) _warmup_jit_function() (in module bridge.training.initialize) _with_progress_tracking() (bridge.models.conversion.model_bridge.MegatronModelBridge method) A a2a_experimental (bridge.peft.lora.LoRA attribute) account_for_embedding_in_pipeline_split (bridge.models.gpt_provider.GPTModelProvider attribute) account_for_loss_in_pipeline_split (bridge.models.gpt_provider.GPTModelProvider attribute) activation_func (bridge.models.llama.llama_provider.Llama3ModelProvider attribute) (bridge.models.llama.llama_provider.LlamaModelProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) adapter_key_filter() (bridge.peft.base.PEFT method) AdapterWrapper (class in bridge.peft.adapter_wrapper) add_additional_special_tokens() (bridge.training.tokenizers.tokenizer._BertWordPieceTokenizer method) add_bias_linear (bridge.models.llama.llama_provider.Llama3ModelProvider attribute) (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) (bridge.models.llama.llama_provider.LlamaModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) add_filter_to_all_loggers() (in module bridge.training.utils.log_utils) add_qkv_bias (bridge.models.qwen.qwen_provider.Qwen2ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) add_special_tokens (bridge.models.hf_pretrained.causal_lm.EncodeKwargs attribute) add_token() (bridge.training.tokenizers.tokenizer._BertWordPieceTokenizer method) additional_special_tokens (bridge.training.tokenizers.tokenizer._BertWordPieceTokenizer property) additional_special_tokens_ids (bridge.training.tokenizers.tokenizer._BertWordPieceTokenizer property) (bridge.training.tokenizers.tokenizer._GPTSentencePieceTokenizer property) (bridge.training.tokenizers.tokenizer._Llama2Tokenizer property) (bridge.training.tokenizers.tokenizer._NullTokenizer property) (bridge.training.tokenizers.tokenizer._SentencePieceTokenizer property) aggregate (bridge.training.comm_overlap.RingExchangeOverlapCfg attribute) align_grad_reduce (bridge.training.config.DistributedInitConfig attribute) align_param_gather (bridge.training.comm_overlap._CommOverlapConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) all2all_hp2sp() (in module bridge.peft.utils) all_gather_item() (in module bridge.training.utils.sig_utils) alpha (bridge.peft.canonical_lora.CanonicalLoRA attribute) (bridge.peft.dora.DoRA attribute) (bridge.peft.lora.LoRA attribute) append_to_progress_log() (in module bridge.training.utils.log_utils) apply_deepep() (in module bridge.training.deepep) apply_moe_token_drop() (in module bridge.training.utils.moe_token_drop) apply_overrides() (in module bridge.training.utils.omegaconf_utils) apply_peft_adapter_filter_to_state_dict() (in module bridge.training.checkpointing) apply_query_key_layer_scaling (bridge.models.mamba.nemotron_h_provider.NemotronHModelProvider attribute) apply_rope_fusion (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.llama.llama_provider.Llama3ModelProvider attribute) (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) (bridge.models.llama.llama_provider.LlamaModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) apply_rope_scaling() (in module bridge.models.llama.llama_provider) ARGS (bridge.utils.instantiate_utils._Keys attribute) ARTIFACTS (bridge.models.hf_pretrained.base.PreTrainedBase attribute) (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM attribute) (bridge.models.hf_pretrained.vlm.PreTrainedVLM attribute) assistant_prefix_len (bridge.training.tokenizers.multimodal_tokenizer.PromptConfig attribute) async_calls_queue (bridge.training.state.GlobalState property) async_save (bridge.training.config.CheckpointConfig attribute) atomic_gemm (bridge.training.comm_overlap.PipelineOverlapCfg attribute) (bridge.training.comm_overlap.RingExchangeOverlapCfg attribute) attention_backend (bridge.models.mamba.mamba_provider.MambaProvider attribute) attention_chunk_size (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) attention_dropout (bridge.models.gpt_provider.GPTProvider175B attribute) (bridge.models.llama.llama_provider.Llama3ModelProvider attribute) (bridge.models.llama.llama_provider.LlamaModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) attention_mask (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) attention_softmax_in_fp32 (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) AutoBridge (class in bridge.models.conversion.auto_bridge) autocast_dtype (bridge.models.qwen.qwen_provider.Qwen2ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) (bridge.training.mixed_precision.MixedPrecisionConfig attribute) autocast_enabled (bridge.training.mixed_precision.MixedPrecisionConfig attribute) AutocastTransformerLayer (class in bridge.models.gpt_full_te_layer_autocast_spec) AutoMapping (class in bridge.models.conversion.param_mapping) B backward() (bridge.peft.utils._All2AllHp2Sp static method) barrier_and_log() (in module bridge.training.utils.log_utils) base_linear_forward() (bridge.peft.adapter_wrapper.AdapterWrapper method) BasicTokenizer (class in bridge.training.tokenizers.bert_tokenization) batch_p2p_comm (bridge.training.comm_overlap._CommOverlapConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) bf16 (bridge.models.mamba.mamba_provider.MambaProvider attribute) (bridge.models.model_provider.GetModelKwargs attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) (bridge.training.mixed_precision.MixedPrecisionConfig attribute) bf16_mixed() (in module bridge.training.mixed_precision) bf16_with_fp8_current_scaling_mixed() (in module bridge.training.mixed_precision) bf16_with_fp8_mixed() (in module bridge.training.mixed_precision) bf16_with_fp8_subchannel_scaling_mixed() (in module bridge.training.mixed_precision) bf16_with_mxfp8_mixed() (in module bridge.training.mixed_precision) bias_activation_fusion (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.gpt_provider.GPTProvider126M attribute) (bridge.models.gpt_provider.GPTProvider175B attribute) (bridge.models.gpt_provider.GPTProvider20B attribute) (bridge.models.gpt_provider.GPTProvider40B attribute) (bridge.models.gpt_provider.GPTProvider5B attribute) (bridge.models.gpt_provider.GPTProvider7B attribute) (bridge.models.llama.llama_provider.Llama3ModelProvider attribute) (bridge.models.llama.llama_provider.LlamaModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) bias_dropout_add_fusion (bridge.models.gpt_provider.GPTProvider126M attribute) (bridge.models.gpt_provider.GPTProvider175B attribute) (bridge.models.gpt_provider.GPTProvider20B attribute) (bridge.models.gpt_provider.GPTProvider40B attribute) (bridge.models.gpt_provider.GPTProvider5B attribute) (bridge.models.gpt_provider.GPTProvider7B attribute) bias_dropout_fusion (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.llama.llama_provider.Llama3ModelProvider attribute) (bridge.models.llama.llama_provider.LlamaModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) blend (bridge.training.config.MockGPTDatasetConfig attribute) blend_per_split (bridge.training.config.MockGPTDatasetConfig attribute) bos (bridge.training.tokenizers.tokenizer._BertWordPieceTokenizer property) (bridge.training.tokenizers.tokenizer._HuggingFaceTokenizer property) (bridge.training.tokenizers.tokenizer._SentencePieceTokenizer property) (bridge.training.tokenizers.tokenizer.CustomTikTokenizer property) bos_id (bridge.training.tokenizers.tokenizer.MegatronTokenizer property) bos_token (bridge.training.tokenizers.tokenizer._BertWordPieceTokenizer property) bos_token_id (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) bpe() (bridge.training.tokenizers.gpt2_tokenization.GPT2Tokenizer method) bridge module bridge.data module bridge.data.builders module bridge.data.builders.finetuning_dataset module bridge.data.builders.hf_dataset module bridge.data.datasets module bridge.data.datasets.packed_sequence module bridge.data.datasets.packing_utils module bridge.data.datasets.sft module bridge.data.datasets.utils module bridge.data.hf_processors module bridge.data.hf_processors.squad module bridge.data.loaders module bridge.data.samplers module bridge.data.utils module bridge.models module bridge.models.config module bridge.models.conversion module bridge.models.conversion.auto_bridge module bridge.models.conversion.mapping_registry module bridge.models.conversion.model_bridge module bridge.models.conversion.param_mapping module bridge.models.conversion.utils module bridge.models.decorators module bridge.models.decorators.dispatch module bridge.models.decorators.torchrun module bridge.models.gpt_full_te_layer_autocast_spec module bridge.models.gpt_provider module bridge.models.hf_pretrained module bridge.models.hf_pretrained.base module bridge.models.hf_pretrained.causal_lm module bridge.models.hf_pretrained.state module bridge.models.hf_pretrained.vlm module bridge.models.llama module bridge.models.llama.llama4_utils module bridge.models.llama.llama_bridge module bridge.models.llama.llama_provider module bridge.models.mamba module bridge.models.mamba.mamba_provider module bridge.models.mamba.nemotron_h_provider module bridge.models.model_provider module bridge.models.qwen module bridge.models.qwen.qwen2_bridge module bridge.models.qwen.qwen3_bridge module bridge.models.qwen.qwen3_moe_bridge module bridge.models.qwen.qwen_provider module bridge.models.t5_provider module bridge.package_info module bridge.peft module bridge.peft.adapter_wrapper module bridge.peft.base module bridge.peft.canonical_lora module bridge.peft.dora module bridge.peft.dora_layers module bridge.peft.lora module bridge.peft.lora_layers module bridge.peft.module_matcher module bridge.peft.utils module bridge.peft.walk_utils module bridge.training module bridge.training.checkpointing module bridge.training.comm_overlap module bridge.training.config module bridge.training.deepep module bridge.training.eval module bridge.training.fault_tolerance module bridge.training.finetune module bridge.training.gpt_step module bridge.training.initialize module bridge.training.losses module bridge.training.mixed_precision module bridge.training.mlm_compat module bridge.training.mlm_compat.activations module bridge.training.mlm_compat.arguments module bridge.training.mlm_compat.model module bridge.training.model_load_save module bridge.training.nvrx_straggler module bridge.training.optim module bridge.training.pretrain module bridge.training.setup module bridge.training.state module bridge.training.tokenizers module bridge.training.tokenizers.bert_tokenization module bridge.training.tokenizers.config module bridge.training.tokenizers.gpt2_tokenization module bridge.training.tokenizers.multimodal_tokenizer module bridge.training.tokenizers.tokenizer module bridge.training.train module bridge.training.utils module bridge.training.utils.checkpoint_utils module bridge.training.utils.config_utils module bridge.training.utils.flop_utils module bridge.training.utils.log_utils module bridge.training.utils.moe_token_drop module bridge.training.utils.omegaconf_utils module bridge.training.utils.sig_utils module bridge.training.utils.theoretical_memory_utils module bridge.training.utils.train_utils module bridge.training.utils.wandb_utils module bridge.utils module bridge.utils.common_utils module bridge.utils.decorators module bridge.utils.fusions module bridge.utils.import_utils module bridge.utils.instantiate_utils module bridge.utils.yaml_utils module broadcast_from_pp_rank() (bridge.models.conversion.param_mapping.MegatronParamMapping method) broadcast_obj_from_pp_rank() (bridge.models.conversion.param_mapping.MegatronParamMapping method) broadcast_tensor_to_tp_ranks() (bridge.models.conversion.param_mapping.MegatronParamMapping method) bucket_size (bridge.training.comm_overlap._CommOverlapConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) build() (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder method) build_conversion_tasks() (bridge.models.conversion.model_bridge.MegatronModelBridge method) build_index_files() (in module bridge.data.datasets.utils) build_index_from_memdata() (in module bridge.data.datasets.utils) build_pretraining_data_loader() (in module bridge.data.samplers) build_tokenizer() (in module bridge.training.tokenizers.tokenizer) build_train_valid_test_data_iterators() (in module bridge.data.loaders) build_train_valid_test_data_loaders() (in module bridge.data.loaders) build_train_valid_test_datasets() (in module bridge.data.loaders) BulkOverlapCfg (class in bridge.training.comm_overlap) bytes_to_unicode() (in module bridge.training.tokenizers.gpt2_tokenization) C calc_ft_timeouts (bridge.training.config.FaultToleranceConfig attribute) calc_individual_gpu_perf (bridge.training.config.NVRxStragglerDetectionConfig attribute) calc_params_l2_norm() (in module bridge.training.utils.train_utils) calc_relative_gpu_perf (bridge.training.config.NVRxStragglerDetectionConfig attribute) CALL (bridge.utils.instantiate_utils._Keys attribute) can_enable_apply_rope_fusion() (in module bridge.utils.fusions) can_enable_bias_dropout_fusion() (in module bridge.utils.fusions) can_enable_gradient_accumulation_fusion() (in module bridge.utils.fusions) can_enable_masked_softmax_fusion() (in module bridge.utils.fusions) can_handle() (bridge.models.conversion.auto_bridge.AutoBridge class method) canonical_mapping (bridge.peft.module_matcher.ModuleMatcher attribute) CanonicalLoRA (class in bridge.peft.canonical_lora) CausalLMType (in module bridge.models.hf_pretrained.causal_lm) cfg (bridge.training.state.GlobalState property) cga_size (bridge.training.comm_overlap.BulkOverlapCfg attribute) (bridge.training.comm_overlap.PipelineOverlapCfg attribute) (bridge.training.comm_overlap.RingExchangeOverlapCfg attribute) check_forward_step_func_num_args() (in module bridge.training.utils.train_utils) check_nvrx_straggler_detection() (in module bridge.training.nvrx_straggler) check_stragglers() (bridge.training.nvrx_straggler.NVRxStragglerDetectionManager method) check_weight_hash_across_dp_replicas_interval (bridge.training.config.TrainingConfig attribute) checkpoint (bridge.training.config.ConfigContainer attribute) checkpoint_and_decide_exit() (in module bridge.training.train) checkpoint_exists() (in module bridge.training.utils.checkpoint_utils) CheckpointConfig (class in bridge.training.config) checkpointing_context (bridge.training.setup.SetupOutput attribute) CheckpointType (class in bridge.training.checkpointing) chunkify() (in module bridge.models.llama.llama4_utils) chunkify_cu_seqlens() (in module bridge.models.llama.llama4_utils) ckpt_assume_constant_structure (bridge.training.config.CheckpointConfig attribute) ckpt_convert_format (bridge.training.config.CheckpointConfig attribute) ckpt_convert_save (bridge.training.config.CheckpointConfig attribute) ckpt_format (bridge.training.config.CheckpointConfig attribute) ckpt_step (bridge.training.config.CheckpointConfig attribute) clean_up_tokenization_spaces (bridge.models.hf_pretrained.causal_lm.DecodeKwargs attribute) cleanup_old_non_persistent_checkpoint() (in module bridge.training.checkpointing) clear_aux_losses_tracker() (in module bridge.training.utils.train_utils) cls (bridge.training.tokenizers.tokenizer._BertWordPieceTokenizer property) (bridge.training.tokenizers.tokenizer._GPTSentencePieceTokenizer property) (bridge.training.tokenizers.tokenizer._Llama2Tokenizer property) (bridge.training.tokenizers.tokenizer._NullTokenizer property) (bridge.training.tokenizers.tokenizer._SentencePieceTokenizer property) CodeLlamaModelProvider13B (class in bridge.models.llama.llama_provider) CodeLlamaModelProvider34B (class in bridge.models.llama.llama_provider) CodeLlamaModelProvider70B (class in bridge.models.llama.llama_provider) CodeLlamaModelProvider7B (class in bridge.models.llama.llama_provider) collate_fn() (bridge.data.datasets.sft.GPTSFTChatDataset method) (bridge.data.datasets.sft.GPTSFTDataset method) (bridge.data.datasets.sft.GPTSFTPackedDataset method) ColumnParallelMapping (class in bridge.models.conversion.param_mapping) comm_overlap (bridge.training.config.ConfigContainer attribute) CommOverlapConfig (class in bridge.training.comm_overlap) compute_activation_memory() (in module bridge.training.utils.theoretical_memory_utils) compute_throughputs_and_append_to_progress_log() (in module bridge.training.train) compute_weight_and_optimizer_memory() (in module bridge.training.utils.theoretical_memory_utils) config (bridge.models.hf_pretrained.base.PreTrainedBase property) CONFIG_FILE (in module bridge.training.utils.checkpoint_utils) CONFIG_NAME (bridge.models.model_provider.ModelProviderMixin attribute) ConfigContainer (class in bridge.training.config) ConfigFormat (in module bridge.models.config) ConfigProtocol (class in bridge.models.config) consumed_train_samples (bridge.training.state.TrainState attribute) consumed_valid_samples (bridge.training.state.TrainState attribute) convert_by_vocab() (in module bridge.training.tokenizers.bert_tokenization) convert_ids_to_tokens() (bridge.training.tokenizers.bert_tokenization.FullTokenizer method) (bridge.training.tokenizers.gpt2_tokenization.GPT2Tokenizer method) (in module bridge.training.tokenizers.bert_tokenization) convert_to_unicode() (in module bridge.training.tokenizers.bert_tokenization) convert_tokens_to_ids() (bridge.training.tokenizers.bert_tokenization.FullTokenizer method) (bridge.training.tokenizers.gpt2_tokenization.GPT2Tokenizer method) (bridge.training.tokenizers.multimodal_tokenizer.MultimodalTokenizer method) (in module bridge.training.tokenizers.bert_tokenization) convert_tokens_to_string() (bridge.training.tokenizers.bert_tokenization.FullTokenizer static method) create_bridge_decorator() (in module bridge.models.conversion.model_bridge) create_hist() (in module bridge.data.datasets.packing_utils) create_omegaconf_dict_config() (in module bridge.training.utils.omegaconf_utils) create_packing_strategy() (in module bridge.data.datasets.packing_utils) create_sft_dataset() (in module bridge.data.datasets.sft) cross_entropy_loss_fusion (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider attribute) curr_eval_iter_idx (bridge.training.state.FaultToleranceState attribute) custom_chat_template (bridge.training.tokenizers.multimodal_tokenizer.PromptConfig attribute) CustomTikTokenizer (class in bridge.training.tokenizers.tokenizer) cyclic_iter() (in module bridge.data.loaders) D data_parallel_random_init (bridge.models.model_provider.GetModelKwargs attribute) (bridge.training.config.RNGConfig attribute) data_parallel_size (bridge.training.comm_overlap.CommOverlapConfig attribute) data_sharding (bridge.training.config.DataloaderConfig attribute) DataclassInstance (in module bridge.training.utils.omegaconf_utils) DataclassT (in module bridge.models.conversion.auto_bridge) dataloader_type (bridge.training.config.DataloaderConfig attribute) DataloaderConfig (class in bridge.training.config) dataset (bridge.training.config.ConfigContainer attribute) dataset_dict (bridge.data.builders.hf_dataset.HFDatasetConfig attribute) dataset_kwargs (bridge.training.config.FinetuningDatasetConfig attribute) dataset_name (bridge.data.builders.hf_dataset.HFDatasetConfig attribute) dataset_root (bridge.training.config.FinetuningDatasetConfig attribute) dataset_subset (bridge.data.builders.hf_dataset.HFDatasetConfig attribute) ddp (bridge.training.config.ConfigContainer attribute) ddp_config (bridge.models.model_provider.GetModelKwargs attribute) deallocate_pipeline_outputs (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) decode() (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) (bridge.training.tokenizers.gpt2_tokenization.GPT2Tokenizer method) (bridge.training.tokenizers.tokenizer._BertWordPieceTokenizer method) decode_token_ids() (bridge.training.tokenizers.tokenizer._BertWordPieceTokenizer method) DecodeKwargs (class in bridge.models.hf_pretrained.causal_lm) decoder (bridge.training.tokenizers.tokenizer._HuggingFaceTokenizer property) (bridge.training.tokenizers.tokenizer._SentencePieceTokenizer property) (bridge.training.tokenizers.tokenizer.CustomTikTokenizer property) decrease_batch_size_if_needed (bridge.training.config.TrainingConfig attribute) DEFAULT_CONFIG_FORMAT (bridge.models.model_provider.ModelProviderMixin attribute) default_layer_spec() (in module bridge.models.gpt_provider) DEFAULT_NEMO_CACHE_HOME (in module bridge.data.datasets.sft) DEFAULT_NEMO_DATASETS_CACHE (in module bridge.data.datasets.sft) DEFAULT_NEMO_MODELS_CACHE (in module bridge.data.datasets.sft) default_pack_path (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder property) defer_embedding_wgrad_compute (bridge.training.comm_overlap._CommOverlapConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) delete_raw (bridge.data.builders.hf_dataset.HFDatasetConfig attribute) describe() (bridge.models.conversion.mapping_registry.MegatronMappingRegistry method) destroy_global_state() (in module bridge.training.initialize) detokenize() (bridge.training.tokenizers.multimodal_tokenizer.MultimodalTokenizer method) (bridge.training.tokenizers.tokenizer._BertWordPieceTokenizer method) (bridge.training.tokenizers.tokenizer._GPT2BPETokenizer method) (bridge.training.tokenizers.tokenizer._GPTSentencePieceTokenizer method) (bridge.training.tokenizers.tokenizer._HuggingFaceTokenizer method) (bridge.training.tokenizers.tokenizer._Llama2Tokenizer method) (bridge.training.tokenizers.tokenizer._NullTokenizer method) (bridge.training.tokenizers.tokenizer._SentencePieceTokenizer method) (bridge.training.tokenizers.tokenizer.CustomTikTokenizer method) DictStateSource (class in bridge.models.hf_pretrained.state) dim (bridge.peft.canonical_lora.CanonicalLoRA attribute) (bridge.peft.dora.DoRA attribute) (bridge.peft.lora.LoRA attribute) DirectMapping (class in bridge.models.conversion.param_mapping) disable_forward_pre_hook() (in module bridge.training.train) disable_straggler_on_startup (bridge.training.config.StragglerDetectionConfig attribute) dispatch() (in module bridge.models.decorators.dispatch) dist (bridge.training.config.ConfigContainer attribute) dist_ckpt_strictness (bridge.training.config.CheckpointConfig attribute) distribute_saved_activations (bridge.models.t5_provider.T5ModelProvider attribute) distributed_backend (bridge.training.config.DistributedInitConfig attribute) distributed_timeout_minutes (bridge.training.config.DistributedInitConfig attribute) DistributedInitConfig (class in bridge.training.config) DistributedSignalHandler (class in bridge.training.utils.sig_utils) do_sample (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) do_test (bridge.training.config.FinetuningDatasetConfig attribute) (bridge.training.state.TrainState attribute) do_train (bridge.training.state.TrainState attribute) do_valid (bridge.training.state.TrainState attribute) do_validation (bridge.training.config.FinetuningDatasetConfig attribute) DoRA (class in bridge.peft.dora) DoRALinear (class in bridge.peft.dora_layers) download_mode (bridge.data.builders.hf_dataset.HFDatasetConfig attribute) dropout (bridge.peft.canonical_lora.CanonicalLoRA attribute) (bridge.peft.dora.DoRA attribute) (bridge.peft.lora.LoRA attribute) dropout_position (bridge.peft.canonical_lora.CanonicalLoRA attribute) (bridge.peft.dora.DoRA attribute) (bridge.peft.lora.LoRA attribute) dtype (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM property) (bridge.models.hf_pretrained.vlm.PreTrainedVLM property) dtype_from_hf() (bridge.models.conversion.model_bridge.MegatronModelBridge method) (in module bridge.training.model_load_save) dtype_from_str() (bridge.models.conversion.model_bridge.MegatronModelBridge method) (in module bridge.training.model_load_save) dump_dataclass_to_yaml() (in module bridge.utils.yaml_utils) E early_stopping (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) empty_unused_memory_level (bridge.training.config.TrainingConfig attribute) enable_autocast (bridge.models.t5_provider.T5ModelProvider attribute) enable_cuda_graph (bridge.models.gpt_provider.GPTModelProvider attribute) enable_forward_pre_hook() (in module bridge.training.train) enable_ft_package (bridge.training.config.FaultToleranceConfig attribute) enable_logging (bridge.training.config.NVRxStragglerDetectionConfig attribute) enable_megatron_core_experimental (bridge.training.config.DistributedInitConfig attribute) enable_straggler_on_startup (bridge.training.config.StragglerDetectionConfig attribute) enabled (bridge.training.config.NVRxStragglerDetectionConfig attribute) encode() (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.training.tokenizers.gpt2_tokenization.GPT2Tokenizer method) encode_text() (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) EncodeKwargs (class in bridge.models.hf_pretrained.causal_lm) encoder (bridge.training.tokenizers.tokenizer._SentencePieceTokenizer property) (bridge.training.tokenizers.tokenizer.CustomTikTokenizer property) encoder_num_layers (bridge.models.t5_provider.T5ModelProvider attribute) encoder_pipeline_model_parallel_size (bridge.models.t5_provider.T5ModelProvider attribute) end_weight_decay (bridge.training.config.SchedulerConfig attribute) energy_monitor (bridge.training.state.GlobalState property) ensure_directory_exists() (in module bridge.training.utils.checkpoint_utils) eod (bridge.training.tokenizers.multimodal_tokenizer.MultimodalTokenizer property) (bridge.training.tokenizers.tokenizer._BertWordPieceTokenizer property) (bridge.training.tokenizers.tokenizer._GPT2BPETokenizer property) (bridge.training.tokenizers.tokenizer._GPTSentencePieceTokenizer property) (bridge.training.tokenizers.tokenizer._HuggingFaceTokenizer property) (bridge.training.tokenizers.tokenizer._Llama2Tokenizer property) (bridge.training.tokenizers.tokenizer._NullTokenizer property) (bridge.training.tokenizers.tokenizer._SentencePieceTokenizer property) (bridge.training.tokenizers.tokenizer.CustomTikTokenizer property) eod_id (bridge.training.tokenizers.tokenizer.MegatronTokenizer property) eos (bridge.training.tokenizers.tokenizer._BertWordPieceTokenizer property) (bridge.training.tokenizers.tokenizer._HuggingFaceTokenizer property) (bridge.training.tokenizers.tokenizer._SentencePieceTokenizer property) (bridge.training.tokenizers.tokenizer.CustomTikTokenizer property) eos_id (bridge.training.tokenizers.tokenizer.MegatronTokenizer property) eos_token (bridge.training.tokenizers.tokenizer._BertWordPieceTokenizer property) eos_token_id (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) ep_rank (bridge.models.conversion.param_mapping.MegatronParamMapping property) ep_size (bridge.models.conversion.param_mapping.MegatronParamMapping property) error_injection_rate (bridge.training.config.RerunStateMachineConfig attribute) error_injection_type (bridge.training.config.RerunStateMachineConfig attribute) etp_rank (bridge.models.conversion.param_mapping.MegatronParamMapping property) etp_size (bridge.models.conversion.param_mapping.MegatronParamMapping property) eval_interval (bridge.training.config.TrainingConfig attribute) eval_iters (bridge.training.config.TrainingConfig attribute) evaluate() (in module bridge.training.eval) evaluate_and_print_results() (in module bridge.training.eval) exclude_modules (bridge.peft.module_matcher.ModuleMatcher attribute) exit_duration_in_mins (bridge.training.config.TrainingConfig attribute) exit_interval (bridge.training.config.TrainingConfig attribute) exit_on_missing_checkpoint (bridge.training.config.CheckpointConfig attribute) exit_signal (bridge.training.config.TrainingConfig attribute) exit_signal_handler (bridge.training.config.TrainingConfig attribute) exit_signal_handler_for_dataloader (bridge.training.config.TrainingConfig attribute) experimental_fn() (in module bridge.utils.decorators) export_ckpt() (bridge.models.conversion.auto_bridge.AutoBridge method) export_hf_weights() (bridge.models.conversion.auto_bridge.AutoBridge method) external_gpu_device_mapping (bridge.training.config.DistributedInitConfig attribute) extract_sort_key() (in module bridge.models.conversion.utils) F fault_tolerance_state (bridge.training.state.GlobalState property) FaultToleranceConfig (class in bridge.training.config) FaultToleranceState (class in bridge.training.state) fc1_dgrad (bridge.training.comm_overlap.TransformerLayerTPOverlapCfg attribute) fc1_fprop (bridge.training.comm_overlap.TransformerLayerTPOverlapCfg attribute) fc1_wgrad (bridge.training.comm_overlap.TransformerLayerTPOverlapCfg attribute) fc2_dgrad (bridge.training.comm_overlap.TransformerLayerTPOverlapCfg attribute) fc2_fprop (bridge.training.comm_overlap.TransformerLayerTPOverlapCfg attribute) ffn_hidden_size (bridge.models.gpt_provider.GPTProvider126M attribute) (bridge.models.gpt_provider.GPTProvider175B attribute) (bridge.models.gpt_provider.GPTProvider20B attribute) (bridge.models.gpt_provider.GPTProvider40B attribute) (bridge.models.gpt_provider.GPTProvider5B attribute) (bridge.models.gpt_provider.GPTProvider7B attribute) (bridge.models.llama.llama_provider.CodeLlamaModelProvider34B attribute) (bridge.models.llama.llama_provider.Llama2ModelProvider13B attribute) (bridge.models.llama.llama_provider.Llama2ModelProvider70B attribute) (bridge.models.llama.llama_provider.Llama2ModelProvider7B attribute) (bridge.models.llama.llama_provider.Llama31ModelProvider405B attribute) (bridge.models.llama.llama_provider.Llama31ModelProvider70B attribute) (bridge.models.llama.llama_provider.Llama31ModelProvider8B attribute) (bridge.models.llama.llama_provider.Llama32ModelProvider1B attribute) (bridge.models.llama.llama_provider.Llama32ModelProvider3B attribute) (bridge.models.llama.llama_provider.Llama3ModelProvider70B attribute) (bridge.models.llama.llama_provider.Llama3ModelProvider8B attribute) (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider130M attribute) (bridge.models.mamba.mamba_provider.MambaProvider1_3B attribute) (bridge.models.mamba.mamba_provider.MambaProvider2_7B attribute) (bridge.models.mamba.mamba_provider.MambaProvider370M attribute) (bridge.models.mamba.mamba_provider.MambaProvider780M attribute) (bridge.models.mamba.mamba_provider.NVIDIAMambaHybridProvider8B attribute) (bridge.models.mamba.mamba_provider.NVIDIAMambaProvider8B attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModel47BProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModel4BProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModel56BProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModel8BProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronNano12Bv2Provider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronNano9Bv2Provider attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider14B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider1P5B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider32B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider3B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider500M attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider72B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider7B attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider1P5B attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider500M attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider72B attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider7B attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider14B attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider1P7B attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider32B attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider4B attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider600M attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider8B attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider235B_A22B attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider30B_A3B attribute) file_exists() (in module bridge.training.utils.checkpoint_utils) fill_packing_strategy() (in module bridge.data.datasets.packing_utils) filter_warnings (bridge.training.config.LoggerConfig attribute) find_checkpoint_rank_0() (in module bridge.training.checkpointing) find_first_bin_that_fits() (in module bridge.data.datasets.packing_utils) finetune (bridge.training.config.CheckpointConfig attribute) finetune() (in module bridge.training.finetune) finetuning_train_valid_test_datasets_provider() (in module bridge.data.utils) FinetuningDatasetBuilder (class in bridge.data.builders.finetuning_dataset) FinetuningDatasetConfig (class in bridge.training.config) first_fit() (in module bridge.data.datasets.packing_utils) first_fit_decreasing() (in module bridge.data.datasets.packing_utils) first_fit_shuffle() (in module bridge.data.datasets.packing_utils) first_last_layers_bf16 (bridge.models.mamba.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.training.mixed_precision.MixedPrecisionConfig attribute) float() (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) floating_point_operations_so_far (bridge.training.state.TrainState attribute) forall() (in module bridge.peft.walk_utils) forward() (bridge.models.gpt_full_te_layer_autocast_spec.AutocastTransformerLayer method) (bridge.models.gpt_full_te_layer_autocast_spec.TETransformerLayerAutocast method) (bridge.models.llama.llama4_utils.Llama4SelfAttention method) (bridge.peft.canonical_lora.LoRALinearSplitFC1UpGate method) (bridge.peft.canonical_lora.LoRALinearSplitQKV method) (bridge.peft.dora_layers.DoRALinear method) (bridge.peft.lora_layers.LinearAdapter method) (bridge.peft.lora_layers.LoRALinear method) (bridge.peft.lora_layers.TELinearAdapter method) (bridge.peft.utils._All2AllHp2Sp static method) (bridge.peft.utils.ParallelLinearAdapter method) forward_step() (in module bridge.training.gpt_step) fp16 (bridge.models.mamba.mamba_provider.MambaProvider attribute) (bridge.models.model_provider.GetModelKwargs attribute) (bridge.training.mixed_precision.MixedPrecisionConfig attribute) fp16_lm_cross_entropy (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) fp16_mixed() (in module bridge.training.mixed_precision) fp16_with_fp8_current_scaling_mixed() (in module bridge.training.mixed_precision) fp16_with_fp8_mixed() (in module bridge.training.mixed_precision) fp16_with_fp8_subchannel_scaling_mixed() (in module bridge.training.mixed_precision) fp16_with_mxfp8_mixed() (in module bridge.training.mixed_precision) fp32 (bridge.training.mixed_precision.MixedPrecisionConfig attribute) fp8 (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.training.mixed_precision.MixedPrecisionConfig attribute) fp8_amax_compute_algo (bridge.training.mixed_precision.MixedPrecisionConfig attribute) fp8_amax_history_len (bridge.training.mixed_precision.MixedPrecisionConfig attribute) fp8_buf (bridge.training.comm_overlap.PipelineOverlapCfg attribute) (bridge.training.comm_overlap.RingExchangeOverlapCfg attribute) fp8_dot_product_attention (bridge.training.mixed_precision.MixedPrecisionConfig attribute) fp8_margin (bridge.training.mixed_precision.MixedPrecisionConfig attribute) fp8_multi_head_attention (bridge.training.mixed_precision.MixedPrecisionConfig attribute) fp8_param (bridge.training.mixed_precision.MixedPrecisionConfig attribute) fp8_param_gather (bridge.training.mixed_precision.MixedPrecisionConfig attribute) fp8_recipe (bridge.training.mixed_precision.MixedPrecisionConfig attribute) fp8_wgrad (bridge.training.mixed_precision.MixedPrecisionConfig attribute) freeze_model() (bridge.peft.base.PEFT method) from_dict() (bridge.training.utils.config_utils._ConfigContainerBase class method) from_hf_config() (bridge.models.conversion.auto_bridge.AutoBridge class method) from_hf_pretrained() (bridge.models.config.ConfigProtocol class method) (bridge.models.conversion.auto_bridge.AutoBridge class method) (bridge.models.model_provider.ModelProviderMixin class method) (in module bridge.models.config) from_pretrained() (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM class method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM class method) (bridge.training.tokenizers.gpt2_tokenization.GPT2Tokenizer class method) from_yaml() (bridge.training.utils.config_utils._ConfigContainerBase class method) ft (bridge.training.config.ConfigContainer attribute) ft_state_path (bridge.training.state.FaultToleranceState attribute) FullTokenizer (class in bridge.training.tokenizers.bert_tokenization) fully_parallel_load (bridge.training.config.CheckpointConfig attribute) fully_parallel_save (bridge.training.config.CheckpointConfig attribute) G gated_linear_unit (bridge.models.llama.llama_provider.Llama3ModelProvider attribute) (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) (bridge.models.llama.llama_provider.LlamaModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) GatedMLPMapping (class in bridge.models.conversion.param_mapping) gather_from_ep_ranks() (bridge.models.conversion.param_mapping.MegatronParamMapping method) gather_from_tp_ranks() (bridge.models.conversion.param_mapping.MegatronParamMapping method) generate() (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) generate_state_dict() (in module bridge.training.checkpointing) GenerateKwargs (class in bridge.models.hf_pretrained.causal_lm) generation_config (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM property) (bridge.models.hf_pretrained.vlm.PreTrainedVLM property) get() (bridge.models.hf_pretrained.state.StateDict method) get_adapter_attributes_from_linear() (in module bridge.peft.utils) get_all_keys() (bridge.models.hf_pretrained.state.DictStateSource method) (bridge.models.hf_pretrained.state.SafeTensorsStateSource method) (bridge.models.hf_pretrained.state.StateSource method) get_all_mappings() (bridge.models.conversion.mapping_registry.MegatronMappingRegistry method) get_artifacts() (bridge.models.hf_pretrained.base.PreTrainedBase method) get_batch() (in module bridge.training.gpt_step) get_batch_from_iterator() (in module bridge.training.gpt_step) get_batch_on_this_tp_rank() (in module bridge.training.gpt_step) get_blend_and_blend_per_split() (in module bridge.data.loaders) get_checkpoint_name() (in module bridge.training.utils.checkpoint_utils) get_checkpoint_run_config_filename() (in module bridge.training.utils.checkpoint_utils) get_checkpoint_tracker_filename() (in module bridge.training.utils.checkpoint_utils) get_checkpoint_train_state_filename() (in module bridge.training.utils.checkpoint_utils) get_checkpoint_version() (in module bridge.training.checkpointing) get_conversion_tasks() (bridge.models.conversion.auto_bridge.AutoBridge method) get_data_parallel_size() (bridge.training.config.ConfigContainer method) get_dataset_provider() (in module bridge.data.utils) get_dataset_root() (in module bridge.data.datasets.sft) get_device() (in module bridge.training.utils.sig_utils) get_gpt_full_te_layer_autocast_spec() (in module bridge.models.gpt_full_te_layer_autocast_spec) get_last_rank() (in module bridge.utils.common_utils) get_llama4_layer_spec() (in module bridge.models.llama.llama4_utils) get_local_rank_preinit() (in module bridge.utils.common_utils) get_mappings_by_pattern() (bridge.models.conversion.mapping_registry.MegatronMappingRegistry method) get_mixed_precision_config() (in module bridge.training.mixed_precision) get_model() (in module bridge.models.model_provider) get_model_bridge() (in module bridge.models.conversion.model_bridge) get_module_and_param_from_name() (in module bridge.models.conversion.utils) get_packed_seq_params() (in module bridge.training.gpt_step) get_pairs() (in module bridge.training.tokenizers.gpt2_tokenization) get_rank_safe() (in module bridge.utils.common_utils) get_rng_state() (in module bridge.training.checkpointing) get_sample_block() (bridge.data.datasets.utils._OnlineSampleMapping method) get_special_tokens() (bridge.training.tokenizers.multimodal_tokenizer.MultimodalTokenizer method) get_start_end_idx() (bridge.data.samplers.MegatronPretrainingSampler method) get_start_time_from_progress_log() (in module bridge.training.train) get_torch_version() (in module bridge.utils.import_utils) get_train_valid_test_num_samples() (in module bridge.data.loaders) get_vocab_size() (in module bridge.models.gpt_provider) get_weight_magnitude() (bridge.peft.dora_layers.ParallelLinearDoRAAdapter method) get_world_size_safe() (in module bridge.utils.common_utils) GetModelKwargs (class in bridge.models.model_provider) glob() (bridge.models.hf_pretrained.state.StateDict method) GLOBAL (bridge.training.checkpointing.CheckpointType attribute) global_batch_size (bridge.training.config.TrainingConfig attribute) GlobalState (class in bridge.training.state) GPT2Tokenizer (class in bridge.training.tokenizers.gpt2_tokenization) GPTDatasetConfig (class in bridge.training.config) GPTModelProvider (class in bridge.models.gpt_provider) GPTProvider126M (class in bridge.models.gpt_provider) GPTProvider175B (class in bridge.models.gpt_provider) GPTProvider20B (class in bridge.models.gpt_provider) GPTProvider40B (class in bridge.models.gpt_provider) GPTProvider5B (class in bridge.models.gpt_provider) GPTProvider7B (class in bridge.models.gpt_provider) GPTSFTChatDataset (class in bridge.data.datasets.sft) GPTSFTDataset (class in bridge.data.datasets.sft) GPTSFTPackedDataset (class in bridge.data.datasets.sft) gpu_individual_perf_threshold (bridge.training.config.NVRxStragglerDetectionConfig attribute) GPU_INSTALL_STRING (in module bridge.utils.import_utils) gpu_only_import() (in module bridge.utils.import_utils) gpu_only_import_from() (in module bridge.utils.import_utils) gpu_relative_perf_threshold (bridge.training.config.NVRxStragglerDetectionConfig attribute) grad_reduce_in_fp32 (bridge.training.mixed_precision.MixedPrecisionConfig attribute) gradient_accumulation_fusion (bridge.models.gpt_provider.GPTModelProvider attribute) H half() (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) handle_index() (in module bridge.data.datasets.utils) has_bos (bridge.training.tokenizers.multimodal_tokenizer.PromptConfig attribute) has_glob() (bridge.models.hf_pretrained.state.SafeTensorsStateSource method) (bridge.models.hf_pretrained.state.StateDict method) (bridge.models.hf_pretrained.state.StateSource method) has_model (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM property) has_system_role (bridge.training.tokenizers.multimodal_tokenizer.PromptConfig attribute) HasBool (class in bridge.peft.walk_utils) HAVE_TE (in module bridge.peft.module_matcher) (in module bridge.peft.utils) hf_filter_lambda (bridge.data.builders.hf_dataset.HFDatasetConfig attribute) hf_filter_lambda_kwargs (bridge.data.builders.hf_dataset.HFDatasetConfig attribute) hf_kwargs (bridge.data.builders.hf_dataset.HFDatasetConfig attribute) hf_to_megatron() (bridge.models.conversion.param_mapping.AutoMapping method) (bridge.models.conversion.param_mapping.ColumnParallelMapping method) (bridge.models.conversion.param_mapping.DirectMapping method) (bridge.models.conversion.param_mapping.GatedMLPMapping method) (bridge.models.conversion.param_mapping.MegatronParamMapping method) (bridge.models.conversion.param_mapping.QKVMapping method) (bridge.models.conversion.param_mapping.ReplicatedMapping method) (bridge.models.conversion.param_mapping.RowParallelMapping method) hf_to_megatron_lookup() (bridge.models.conversion.mapping_registry.MegatronMappingRegistry method) hf_train_valid_test_datasets_provider() (in module bridge.data.utils) HFDatasetBuilder (class in bridge.data.builders.hf_dataset) HFDatasetConfig (class in bridge.data.builders.hf_dataset) HFPreTrained (in module bridge.models.conversion.model_bridge) HFWeightTuple (class in bridge.models.conversion.model_bridge) hidden_dropout (bridge.models.gpt_provider.GPTProvider175B attribute) (bridge.models.llama.llama_provider.Llama3ModelProvider attribute) (bridge.models.llama.llama_provider.LlamaModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) hidden_size (bridge.models.gpt_provider.GPTProvider126M attribute) (bridge.models.gpt_provider.GPTProvider175B attribute) (bridge.models.gpt_provider.GPTProvider20B attribute) (bridge.models.gpt_provider.GPTProvider40B attribute) (bridge.models.gpt_provider.GPTProvider5B attribute) (bridge.models.gpt_provider.GPTProvider7B attribute) (bridge.models.llama.llama_provider.CodeLlamaModelProvider34B attribute) (bridge.models.llama.llama_provider.Llama2ModelProvider13B attribute) (bridge.models.llama.llama_provider.Llama2ModelProvider70B attribute) (bridge.models.llama.llama_provider.Llama2ModelProvider7B attribute) (bridge.models.llama.llama_provider.Llama31ModelProvider405B attribute) (bridge.models.llama.llama_provider.Llama31ModelProvider70B attribute) (bridge.models.llama.llama_provider.Llama31ModelProvider8B attribute) (bridge.models.llama.llama_provider.Llama32ModelProvider1B attribute) (bridge.models.llama.llama_provider.Llama32ModelProvider3B attribute) (bridge.models.llama.llama_provider.Llama3ModelProvider70B attribute) (bridge.models.llama.llama_provider.Llama3ModelProvider8B attribute) (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider130M attribute) (bridge.models.mamba.mamba_provider.MambaProvider1_3B attribute) (bridge.models.mamba.mamba_provider.MambaProvider2_7B attribute) (bridge.models.mamba.mamba_provider.MambaProvider370M attribute) (bridge.models.mamba.mamba_provider.MambaProvider780M attribute) (bridge.models.mamba.mamba_provider.NVIDIAMambaHybridProvider8B attribute) (bridge.models.mamba.mamba_provider.NVIDIAMambaProvider8B attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModel47BProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModel4BProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModel56BProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModel8BProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronNano12Bv2Provider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronNano9Bv2Provider attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider14B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider1P5B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider32B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider3B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider500M attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider72B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider7B attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider1P5B attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider500M attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider72B attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider7B attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider14B attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider1P7B attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider32B attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider4B attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider600M attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider8B attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider235B_A22B attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider30B_A3B attribute) high_freq_factor (bridge.models.llama.llama_provider.Llama31ModelProvider attribute) high_priority_stream_groups (bridge.training.config.DistributedInitConfig attribute) hybrid_attention_ratio (bridge.models.mamba.mamba_provider.MambaProvider attribute) hybrid_mlp_ratio (bridge.models.mamba.mamba_provider.MambaProvider attribute) hybrid_override_pattern (bridge.models.mamba.mamba_provider.MambaProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider130M attribute) (bridge.models.mamba.mamba_provider.MambaProvider1_3B attribute) (bridge.models.mamba.mamba_provider.MambaProvider2_7B attribute) (bridge.models.mamba.mamba_provider.MambaProvider370M attribute) (bridge.models.mamba.mamba_provider.MambaProvider780M attribute) (bridge.models.mamba.mamba_provider.NVIDIAMambaHybridProvider8B attribute) (bridge.models.mamba.mamba_provider.NVIDIAMambaProvider8B attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModel47BProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModel4BProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModel56BProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModel8BProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronNano12Bv2Provider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronNano9Bv2Provider attribute) hysteresis (bridge.training.mixed_precision.MixedPrecisionConfig attribute) I IGNORE_INDEX (in module bridge.data.datasets.utils) image_processor (bridge.models.hf_pretrained.vlm.PreTrainedVLM property) image_tag_type (bridge.training.tokenizers.config.TokenizerConfig attribute) IMAGE_TAGS (in module bridge.training.tokenizers.multimodal_tokenizer) impl() (bridge.models.decorators.dispatch._Dispatch method) import_ckpt() (bridge.models.conversion.auto_bridge.AutoBridge class method) inference_rng_tracker (bridge.training.config.RNGConfig attribute) init_async_checkpoint_worker() (in module bridge.training.checkpointing) init_checkpointing_context() (in module bridge.training.checkpointing) init_method_const() (in module bridge.peft.utils) init_method_kaiming_uniform() (in module bridge.peft.utils) init_method_normal() (in module bridge.peft.utils) init_method_std (bridge.models.llama.llama_provider.Llama31ModelProvider attribute) (bridge.models.llama.llama_provider.Llama3ModelProvider attribute) (bridge.models.llama.llama_provider.Llama3ModelProvider70B attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) init_model_with_meta_device (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.model_provider.GetModelKwargs attribute) init_rerun_state() (in module bridge.training.initialize) init_weight_magnitude() (bridge.peft.dora_layers.ParallelLinearDoRAAdapter method) initial_loss_scale (bridge.training.mixed_precision.MixedPrecisionConfig attribute) initialize() (bridge.training.nvrx_straggler.NVRxStragglerDetectionManager method) initialize_megatron() (in module bridge.training.initialize) initialize_model_parallel() (bridge.models.model_provider.ModelProviderMixin method) input (bridge.data.builders.hf_dataset.ProcessExampleOutput attribute) instantiate() (in module bridge.utils.instantiate_utils) instantiate_node() (in module bridge.utils.instantiate_utils) InstantiationException InstantiationMode (class in bridge.utils.instantiate_utils) inv_vocab (bridge.training.tokenizers.multimodal_tokenizer.MultimodalTokenizer property) (bridge.training.tokenizers.tokenizer._BertWordPieceTokenizer property) (bridge.training.tokenizers.tokenizer._GPT2BPETokenizer property) (bridge.training.tokenizers.tokenizer._HuggingFaceTokenizer property) (bridge.training.tokenizers.tokenizer._NullTokenizer property) (bridge.training.tokenizers.tokenizer._SentencePieceTokenizer property) (bridge.training.tokenizers.tokenizer.CustomTikTokenizer property) is_async_chkpt_enabled (bridge.training.state.FaultToleranceState attribute) is_calculating_timeouts (bridge.training.state.FaultToleranceState attribute) is_dataset_built_on_rank() (in module bridge.data.utils) is_empty_async_queue() (in module bridge.training.checkpointing) is_expert (bridge.models.conversion.param_mapping.MegatronParamMapping property) is_expert_linear() (in module bridge.peft.utils) is_hybrid_model (bridge.models.mamba.nemotron_h_provider.NemotronHModelProvider attribute) is_last_rank() (in module bridge.utils.common_utils) is_persistent_chkpt_loaded (bridge.training.state.FaultToleranceState attribute) is_setup_section_open (bridge.training.state.FaultToleranceState attribute) is_tensor_parallel() (in module bridge.models.conversion.model_bridge) is_torch_min_version() (in module bridge.utils.import_utils) is_unavailable() (in module bridge.utils.import_utils) items() (bridge.models.hf_pretrained.state.StateDict method) K key_to_filename_map (bridge.models.hf_pretrained.state.SafeTensorsStateSource property) keys() (bridge.models.hf_pretrained.state.StateDict method) kv_channels (bridge.models.mamba.nemotron_h_provider.NemotronHModel4BProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronNano12Bv2Provider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronNano9Bv2Provider attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) kwargs (bridge.models.hf_pretrained.vlm.PreTrainedVLM property) L layernorm_epsilon (bridge.models.llama.llama_provider.Llama3ModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider14B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider32B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider72B attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider72B attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) layernorm_zero_centered_gamma (bridge.models.gpt_provider.GPTProvider175B attribute) lazy_init (bridge.training.config.DistributedInitConfig attribute) LENIENT (bridge.utils.instantiate_utils.InstantiationMode attribute) LinearAdapter (class in bridge.peft.lora_layers) list_supported_models() (bridge.models.conversion.auto_bridge.AutoBridge class method) Llama2ModelProvider13B (class in bridge.models.llama.llama_provider) Llama2ModelProvider70B (class in bridge.models.llama.llama_provider) Llama2ModelProvider7B (class in bridge.models.llama.llama_provider) Llama31ModelProvider (class in bridge.models.llama.llama_provider) Llama31ModelProvider405B (class in bridge.models.llama.llama_provider) Llama31ModelProvider70B (class in bridge.models.llama.llama_provider) Llama31ModelProvider8B (class in bridge.models.llama.llama_provider) Llama32ModelProvider1B (class in bridge.models.llama.llama_provider) Llama32ModelProvider3B (class in bridge.models.llama.llama_provider) Llama3ModelProvider (class in bridge.models.llama.llama_provider) Llama3ModelProvider70B (class in bridge.models.llama.llama_provider) Llama3ModelProvider8B (class in bridge.models.llama.llama_provider) llama3p1_chat_template (in module bridge.training.tokenizers.multimodal_tokenizer) Llama4Experts128ModelProvider (class in bridge.models.llama.llama_provider) Llama4Experts16ModelProvider (class in bridge.models.llama.llama_provider) Llama4ModelProvider (class in bridge.models.llama.llama_provider) Llama4SelfAttention (class in bridge.models.llama.llama4_utils) LlamaBridge (class in bridge.models.llama.llama_bridge) LlamaModelProvider (class in bridge.models.llama.llama_provider) load (bridge.training.config.CheckpointConfig attribute) load_checkpoint() (in module bridge.training.checkpointing) load_file() (bridge.data.datasets.utils._TextMemMapDataset method) load_hf_weights() (bridge.models.conversion.auto_bridge.AutoBridge method) load_main_params_from_ckpt (bridge.training.config.CheckpointConfig attribute) load_megatron_model() (bridge.models.conversion.auto_bridge.AutoBridge method) (in module bridge.training.model_load_save) load_optim (bridge.training.config.CheckpointConfig attribute) load_rng (bridge.training.config.CheckpointConfig attribute) load_state_dict() (bridge.training.state.TrainState method) load_tensors() (bridge.models.hf_pretrained.state.DictStateSource method) (bridge.models.hf_pretrained.state.SafeTensorsStateSource method) (bridge.models.hf_pretrained.state.StateSource method) load_tokenizer() (in module bridge.training.model_load_save) load_vocab() (in module bridge.training.tokenizers.bert_tokenization) load_weights_hf_to_megatron() (bridge.models.conversion.model_bridge.MegatronModelBridge method) LOCAL (bridge.training.checkpointing.CheckpointType attribute) local_layer_spec() (in module bridge.models.gpt_provider) (in module bridge.models.t5_provider) local_rank (bridge.training.config.DistributedInitConfig attribute) log_energy (bridge.training.config.LoggerConfig attribute) LOG_FUSION_DISABLE (in module bridge.utils.fusions) log_interval (bridge.training.config.LoggerConfig attribute) log_loss_scale_to_tensorboard (bridge.training.config.LoggerConfig attribute) log_memory_to_tensorboard (bridge.training.config.LoggerConfig attribute) log_params_norm (bridge.training.config.LoggerConfig attribute) log_progress (bridge.training.config.LoggerConfig attribute) log_single_rank() (in module bridge.training.utils.log_utils) log_straggler (bridge.training.config.StragglerDetectionConfig attribute) log_throughput (bridge.training.config.LoggerConfig attribute) log_timers_to_tensorboard (bridge.training.config.LoggerConfig attribute) log_validation_ppl_to_tensorboard (bridge.training.config.LoggerConfig attribute) log_world_size_to_tensorboard (bridge.training.config.LoggerConfig attribute) logger (bridge.training.config.ConfigContainer attribute) (in module bridge.data.builders.finetuning_dataset) (in module bridge.data.builders.hf_dataset) (in module bridge.data.datasets.packed_sequence) (in module bridge.data.datasets.packing_utils) (in module bridge.data.datasets.sft) (in module bridge.data.datasets.utils) (in module bridge.models.conversion.model_bridge) (in module bridge.models.gpt_provider) (in module bridge.models.llama.llama_provider) (in module bridge.models.mamba.mamba_provider) (in module bridge.models.mamba.nemotron_h_provider) (in module bridge.models.qwen.qwen_provider) (in module bridge.models.t5_provider) (in module bridge.peft.base) (in module bridge.peft.canonical_lora) (in module bridge.peft.dora) (in module bridge.peft.lora) (in module bridge.training.checkpointing) (in module bridge.training.gpt_step) (in module bridge.training.model_load_save) (in module bridge.training.tokenizers.gpt2_tokenization) (in module bridge.training.utils.checkpoint_utils) (in module bridge.training.utils.log_utils) (in module bridge.training.utils.omegaconf_utils) (in module bridge.utils.decorators) (in module bridge.utils.fusions) (in module bridge.utils.import_utils) logger_name (bridge.training.config.NVRxStragglerDetectionConfig attribute) LoggerConfig (class in bridge.training.config) logging_level (bridge.training.config.LoggerConfig attribute) logical_and_across_model_parallel_group() (in module bridge.training.utils.train_utils) LoRA (class in bridge.peft.lora) lora_A_init_method (bridge.peft.canonical_lora.CanonicalLoRA attribute) (bridge.peft.dora.DoRA attribute) (bridge.peft.lora.LoRA attribute) lora_B_init_method (bridge.peft.canonical_lora.CanonicalLoRA attribute) (bridge.peft.dora.DoRA attribute) (bridge.peft.lora.LoRA attribute) lora_dtype (bridge.peft.lora.LoRA attribute) LoRALinear (class in bridge.peft.lora_layers) LoRALinearSplitFC1UpGate (class in bridge.peft.canonical_lora) LoRALinearSplitQKV (class in bridge.peft.canonical_lora) LoRAMerge (class in bridge.peft.lora) loss_scale (bridge.training.mixed_precision.MixedPrecisionConfig attribute) loss_scale_window (bridge.training.mixed_precision.MixedPrecisionConfig attribute) low_freq_factor (bridge.models.llama.llama_provider.Llama31ModelProvider attribute) lr_decay_iters (bridge.training.config.SchedulerConfig attribute) lr_decay_steps (bridge.training.config.SchedulerConfig attribute) lr_decay_style (bridge.training.config.SchedulerConfig attribute) lr_warmup_fraction (bridge.training.config.SchedulerConfig attribute) lr_warmup_init (bridge.training.config.SchedulerConfig attribute) lr_warmup_iters (bridge.training.config.SchedulerConfig attribute) lr_warmup_steps (bridge.training.config.SchedulerConfig attribute) lr_wsd_decay_iters (bridge.training.config.SchedulerConfig attribute) lr_wsd_decay_style (bridge.training.config.SchedulerConfig attribute) M MAJOR (in module bridge.package_info) make_vocab_size_divisible_by (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.llama.llama_provider.Llama31ModelProvider405B attribute) (bridge.models.llama.llama_provider.Llama31ModelProvider70B attribute) (bridge.models.llama.llama_provider.Llama32ModelProvider1B attribute) (bridge.models.llama.llama_provider.Llama32ModelProvider3B attribute) (bridge.models.llama.llama_provider.Llama3ModelProvider70B attribute) (bridge.models.mamba.mamba_provider.MambaProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider130M attribute) (bridge.models.mamba.mamba_provider.MambaProvider1_3B attribute) (bridge.models.mamba.mamba_provider.MambaProvider2_7B attribute) (bridge.models.mamba.mamba_provider.MambaProvider370M attribute) (bridge.models.mamba.mamba_provider.MambaProvider780M attribute) (bridge.models.mamba.mamba_provider.NVIDIAMambaHybridProvider8B attribute) (bridge.models.mamba.mamba_provider.NVIDIAMambaProvider8B attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) make_vocab_size_divisible_by() (bridge.models.conversion.model_bridge.MegatronModelBridge method) mamba_head_dim (bridge.models.mamba.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronNano12Bv2Provider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronNano9Bv2Provider attribute) mamba_num_groups (bridge.models.mamba.mamba_provider.MambaProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider130M attribute) (bridge.models.mamba.mamba_provider.MambaProvider1_3B attribute) (bridge.models.mamba.mamba_provider.MambaProvider2_7B attribute) (bridge.models.mamba.mamba_provider.MambaProvider370M attribute) (bridge.models.mamba.mamba_provider.MambaProvider780M attribute) (bridge.models.mamba.mamba_provider.NVIDIAMambaHybridProvider8B attribute) (bridge.models.mamba.mamba_provider.NVIDIAMambaProvider8B attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModelProvider attribute) mamba_num_heads (bridge.models.mamba.nemotron_h_provider.NemotronHModel4BProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronNano12Bv2Provider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronNano9Bv2Provider attribute) mamba_stack_spec (bridge.models.mamba.mamba_provider.MambaProvider attribute) mamba_state_dim (bridge.models.mamba.nemotron_h_provider.NemotronHModel47BProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModel4BProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModel56BProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModel8BProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronNano12Bv2Provider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronNano9Bv2Provider attribute) MambaProvider (class in bridge.models.mamba.mamba_provider) MambaProvider130M (class in bridge.models.mamba.mamba_provider) MambaProvider1_3B (class in bridge.models.mamba.mamba_provider) MambaProvider2_7B (class in bridge.models.mamba.mamba_provider) MambaProvider370M (class in bridge.models.mamba.mamba_provider) MambaProvider780M (class in bridge.models.mamba.mamba_provider) manual_gc (bridge.training.config.TrainingConfig attribute) manual_gc_eval (bridge.training.config.TrainingConfig attribute) manual_gc_interval (bridge.training.config.TrainingConfig attribute) map() (in module bridge.peft.walk_utils) mapping (bridge.models.conversion.model_bridge.WeightConversionTask attribute) mapping_registry() (bridge.models.conversion.model_bridge.MegatronModelBridge method) (bridge.models.llama.llama_bridge.LlamaBridge method) (bridge.models.qwen.qwen2_bridge.Qwen2Bridge method) (bridge.models.qwen.qwen3_bridge.Qwen3Bridge method) (bridge.models.qwen.qwen3_moe_bridge.Qwen3MoEBridge method) MappingT (in module bridge.models.conversion.model_bridge) mask (bridge.training.tokenizers.tokenizer._BertWordPieceTokenizer property) (bridge.training.tokenizers.tokenizer._GPTSentencePieceTokenizer property) (bridge.training.tokenizers.tokenizer._HuggingFaceTokenizer property) (bridge.training.tokenizers.tokenizer._Llama2Tokenizer property) (bridge.training.tokenizers.tokenizer._NullTokenizer property) (bridge.training.tokenizers.tokenizer._SentencePieceTokenizer property) mask_id (bridge.training.tokenizers.tokenizer.MegatronTokenizer property) masked_next_token_loss() (in module bridge.training.losses) masked_softmax_fusion (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.llama.llama_provider.Llama3ModelProvider attribute) (bridge.models.llama.llama_provider.LlamaModelProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) match() (bridge.peft.module_matcher.ModuleMatcher method) max_length (bridge.models.hf_pretrained.causal_lm.EncodeKwargs attribute) (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) max_new_tokens (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) max_position_embeddings (bridge.models.t5_provider.T5ModelProvider attribute) max_train_samples (bridge.training.config.FinetuningDatasetConfig attribute) maybe_finalize_async_save() (in module bridge.training.checkpointing) maybe_inject_state() (in module bridge.training.utils.train_utils) maybe_save_dataloader_state() (in module bridge.training.checkpointing) maybe_setup_simulated_fault() (in module bridge.training.fault_tolerance) megatron_cpu_init_context() (in module bridge.training.model_load_save) megatron_module (bridge.models.conversion.model_bridge.WeightConversionTask attribute) megatron_to_hf() (bridge.models.conversion.param_mapping.AutoMapping method) (bridge.models.conversion.param_mapping.ColumnParallelMapping method) (bridge.models.conversion.param_mapping.DirectMapping method) (bridge.models.conversion.param_mapping.GatedMLPMapping method) (bridge.models.conversion.param_mapping.MegatronParamMapping method) (bridge.models.conversion.param_mapping.QKVMapping method) (bridge.models.conversion.param_mapping.ReplicatedMapping method) (bridge.models.conversion.param_mapping.RowParallelMapping method) megatron_to_hf_lookup() (bridge.models.conversion.mapping_registry.MegatronMappingRegistry method) MegatronMappingRegistry (class in bridge.models.conversion.mapping_registry) MegatronModel (in module bridge.models.conversion.model_bridge) MegatronModelBridge (class in bridge.models.conversion.model_bridge) MegatronModelT (in module bridge.models.conversion.auto_bridge) MegatronParamMapping (class in bridge.models.conversion.param_mapping) MegatronPretrainingRandomSampler (class in bridge.data.samplers) MegatronPretrainingSampler (class in bridge.data.samplers) MegatronTokenizer (class in bridge.training.tokenizers.tokenizer) MegatronWeightTuple (class in bridge.models.conversion.model_bridge) memmap_workers (bridge.training.config.FinetuningDatasetConfig attribute) memory_snapshot_path (bridge.training.config.ProfilingConfig attribute) merge_file (bridge.training.tokenizers.config.TokenizerConfig attribute) merge_qkv_biases() (in module bridge.models.conversion.param_mapping) merge_qkv_weights() (in module bridge.models.conversion.param_mapping) MERGES_NAME (in module bridge.training.tokenizers.gpt2_tokenization) meta_model (bridge.models.model_provider.ModelProviderMixin property) method (bridge.training.comm_overlap.BulkOverlapCfg attribute) (bridge.training.comm_overlap.PipelineOverlapCfg attribute) (bridge.training.comm_overlap.RingExchangeOverlapCfg attribute) micro_batch_size (bridge.training.config.TrainingConfig attribute) min_length (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) min_loss_scale (bridge.training.mixed_precision.MixedPrecisionConfig attribute) MINOR (in module bridge.package_info) MISSING_NEMO_EXPORT_DEPLOY_MSG (in module bridge.utils.import_utils) MISSING_NEMO_RUN_MSG (in module bridge.utils.import_utils) MISSING_NVRX_MSG (in module bridge.utils.import_utils) mistral_custom_template (in module bridge.training.tokenizers.multimodal_tokenizer) mixed_precision (bridge.training.config.ConfigContainer attribute) MIXED_PRECISION_RECIPES (in module bridge.training.mixed_precision) MixedPrecisionConfig (class in bridge.training.mixed_precision) mla_transformer_config (bridge.models.conversion.auto_bridge.AutoBridge property) MockGPTDatasetConfig (class in bridge.training.config) model (bridge.models.hf_pretrained.base.PreTrainedBase property) (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM property) (bridge.models.hf_pretrained.vlm.PreTrainedVLM property) (bridge.training.config.ConfigContainer attribute) (bridge.training.setup.SetupOutput attribute) model_name_or_path (bridge.models.hf_pretrained.base.PreTrainedBase attribute) (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM property) (bridge.models.hf_pretrained.vlm.PreTrainedVLM property) model_type (bridge.models.model_provider.GetModelKwargs attribute) ModelProviderMixin (class in bridge.models.model_provider) ModelProviderTarget (in module bridge.models.conversion.model_bridge) ModelT (in module bridge.models.model_provider) ModelType (in module bridge.peft.base) module bridge bridge.data bridge.data.builders bridge.data.builders.finetuning_dataset bridge.data.builders.hf_dataset bridge.data.datasets bridge.data.datasets.packed_sequence bridge.data.datasets.packing_utils bridge.data.datasets.sft bridge.data.datasets.utils bridge.data.hf_processors bridge.data.hf_processors.squad bridge.data.loaders bridge.data.samplers bridge.data.utils bridge.models bridge.models.config bridge.models.conversion bridge.models.conversion.auto_bridge bridge.models.conversion.mapping_registry bridge.models.conversion.model_bridge bridge.models.conversion.param_mapping bridge.models.conversion.utils bridge.models.decorators bridge.models.decorators.dispatch bridge.models.decorators.torchrun bridge.models.gpt_full_te_layer_autocast_spec bridge.models.gpt_provider bridge.models.hf_pretrained bridge.models.hf_pretrained.base bridge.models.hf_pretrained.causal_lm bridge.models.hf_pretrained.state bridge.models.hf_pretrained.vlm bridge.models.llama bridge.models.llama.llama4_utils bridge.models.llama.llama_bridge bridge.models.llama.llama_provider bridge.models.mamba bridge.models.mamba.mamba_provider bridge.models.mamba.nemotron_h_provider bridge.models.model_provider bridge.models.qwen bridge.models.qwen.qwen2_bridge bridge.models.qwen.qwen3_bridge bridge.models.qwen.qwen3_moe_bridge bridge.models.qwen.qwen_provider bridge.models.t5_provider bridge.package_info bridge.peft bridge.peft.adapter_wrapper bridge.peft.base bridge.peft.canonical_lora bridge.peft.dora bridge.peft.dora_layers bridge.peft.lora bridge.peft.lora_layers bridge.peft.module_matcher bridge.peft.utils bridge.peft.walk_utils bridge.training bridge.training.checkpointing bridge.training.comm_overlap bridge.training.config bridge.training.deepep bridge.training.eval bridge.training.fault_tolerance bridge.training.finetune bridge.training.gpt_step bridge.training.initialize bridge.training.losses bridge.training.mixed_precision bridge.training.mlm_compat bridge.training.mlm_compat.activations bridge.training.mlm_compat.arguments bridge.training.mlm_compat.model bridge.training.model_load_save bridge.training.nvrx_straggler bridge.training.optim bridge.training.pretrain bridge.training.setup bridge.training.state bridge.training.tokenizers bridge.training.tokenizers.bert_tokenization bridge.training.tokenizers.config bridge.training.tokenizers.gpt2_tokenization bridge.training.tokenizers.multimodal_tokenizer bridge.training.tokenizers.tokenizer bridge.training.train bridge.training.utils bridge.training.utils.checkpoint_utils bridge.training.utils.config_utils bridge.training.utils.flop_utils bridge.training.utils.log_utils bridge.training.utils.moe_token_drop bridge.training.utils.omegaconf_utils bridge.training.utils.sig_utils bridge.training.utils.theoretical_memory_utils bridge.training.utils.train_utils bridge.training.utils.wandb_utils bridge.utils bridge.utils.common_utils bridge.utils.decorators bridge.utils.fusions bridge.utils.import_utils bridge.utils.instantiate_utils bridge.utils.yaml_utils module_filter() (in module bridge.training.utils.log_utils) ModuleDict (class in bridge.peft.canonical_lora) ModuleFunc (in module bridge.peft.walk_utils) ModuleMatcher (class in bridge.peft.module_matcher) ModulePredicate (in module bridge.peft.walk_utils) modules_to_filter (bridge.training.config.LoggerConfig attribute) moe_apply_probs_on_input (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) moe_aux_loss_coeff (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) moe_ffn_hidden_size (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider235B_A22B attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider30B_A3B attribute) moe_grouped_gemm (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) moe_layer_freq (bridge.models.llama.llama_provider.Llama4Experts128ModelProvider attribute) moe_permute_fusion (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) moe_router_dtype (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) moe_router_load_balancing_type (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) moe_router_pre_softmax (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) moe_router_score_function (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) moe_router_topk (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) moe_shared_expert_intermediate_size (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) moe_shared_expert_overlap (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) moe_token_dispatcher_type (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) mtp_block_spec() (in module bridge.models.gpt_provider) mtp_enabled (bridge.models.gpt_provider.GPTModelProvider attribute) MultimodalTokenizer (class in bridge.training.tokenizers.multimodal_tokenizer) N nanov2_bf16_with_fp8_current_scaling_mixed() (in module bridge.training.mixed_precision) nccl_communicator_config_path (bridge.training.config.DistributedInitConfig attribute) NEMO_CACHE_HOME (in module bridge.data.datasets.sft) NEMO_DATASETS_CACHE (in module bridge.data.datasets.sft) NEMO_MODELS_CACHE (in module bridge.data.datasets.sft) nemotron_h_bf16_with_fp8_current_scaling_mixed() (in module bridge.training.mixed_precision) NemotronHModel47BProvider (class in bridge.models.mamba.nemotron_h_provider) NemotronHModel4BProvider (class in bridge.models.mamba.nemotron_h_provider) NemotronHModel56BProvider (class in bridge.models.mamba.nemotron_h_provider) NemotronHModel8BProvider (class in bridge.models.mamba.nemotron_h_provider) NemotronHModelProvider (class in bridge.models.mamba.nemotron_h_provider) NemotronNano12Bv2Provider (class in bridge.models.mamba.nemotron_h_provider) NemotronNano9Bv2Provider (class in bridge.models.mamba.nemotron_h_provider) non_persistent_ckpt_type (bridge.training.config.CheckpointConfig attribute) non_persistent_global_ckpt_dir (bridge.training.config.CheckpointConfig attribute) non_persistent_local_ckpt_algo (bridge.training.config.CheckpointConfig attribute) non_persistent_local_ckpt_dir (bridge.training.config.CheckpointConfig attribute) non_persistent_save_interval (bridge.training.config.CheckpointConfig attribute) nope_layer_interval (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) normalization (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.llama.llama_provider.Llama3ModelProvider attribute) (bridge.models.llama.llama_provider.LlamaModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) null_decorator() (in module bridge.utils.import_utils) num_attention_heads (bridge.models.gpt_provider.GPTProvider126M attribute) (bridge.models.gpt_provider.GPTProvider175B attribute) (bridge.models.gpt_provider.GPTProvider20B attribute) (bridge.models.gpt_provider.GPTProvider40B attribute) (bridge.models.gpt_provider.GPTProvider5B attribute) (bridge.models.gpt_provider.GPTProvider7B attribute) (bridge.models.llama.llama_provider.CodeLlamaModelProvider34B attribute) (bridge.models.llama.llama_provider.Llama2ModelProvider13B attribute) (bridge.models.llama.llama_provider.Llama2ModelProvider70B attribute) (bridge.models.llama.llama_provider.Llama2ModelProvider7B attribute) (bridge.models.llama.llama_provider.Llama31ModelProvider405B attribute) (bridge.models.llama.llama_provider.Llama31ModelProvider70B attribute) (bridge.models.llama.llama_provider.Llama31ModelProvider8B attribute) (bridge.models.llama.llama_provider.Llama32ModelProvider1B attribute) (bridge.models.llama.llama_provider.Llama32ModelProvider3B attribute) (bridge.models.llama.llama_provider.Llama3ModelProvider70B attribute) (bridge.models.llama.llama_provider.Llama3ModelProvider8B attribute) (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider attribute) (bridge.models.mamba.mamba_provider.NVIDIAMambaHybridProvider8B attribute) (bridge.models.mamba.mamba_provider.NVIDIAMambaProvider8B attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModel47BProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModel4BProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModel56BProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModel8BProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronNano12Bv2Provider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronNano9Bv2Provider attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider14B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider1P5B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider32B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider3B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider500M attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider72B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider7B attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider1P5B attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider500M attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider72B attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider7B attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider14B attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider1P7B attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider32B attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider4B attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider600M attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider8B attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider235B_A22B attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider30B_A3B attribute) num_beams (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) NUM_BYTES_IN_MEGABYTE (in module bridge.training.utils.theoretical_memory_utils) num_floating_point_operations() (in module bridge.training.utils.flop_utils) num_gpu_perf_scores_to_print (bridge.training.config.NVRxStragglerDetectionConfig attribute) num_layers (bridge.models.gpt_provider.GPTProvider126M attribute) (bridge.models.gpt_provider.GPTProvider175B attribute) (bridge.models.gpt_provider.GPTProvider20B attribute) (bridge.models.gpt_provider.GPTProvider40B attribute) (bridge.models.gpt_provider.GPTProvider5B attribute) (bridge.models.gpt_provider.GPTProvider7B attribute) (bridge.models.llama.llama_provider.CodeLlamaModelProvider34B attribute) (bridge.models.llama.llama_provider.Llama2ModelProvider13B attribute) (bridge.models.llama.llama_provider.Llama2ModelProvider70B attribute) (bridge.models.llama.llama_provider.Llama2ModelProvider7B attribute) (bridge.models.llama.llama_provider.Llama31ModelProvider405B attribute) (bridge.models.llama.llama_provider.Llama31ModelProvider70B attribute) (bridge.models.llama.llama_provider.Llama31ModelProvider8B attribute) (bridge.models.llama.llama_provider.Llama32ModelProvider1B attribute) (bridge.models.llama.llama_provider.Llama32ModelProvider3B attribute) (bridge.models.llama.llama_provider.Llama3ModelProvider70B attribute) (bridge.models.llama.llama_provider.Llama3ModelProvider8B attribute) (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider130M attribute) (bridge.models.mamba.mamba_provider.MambaProvider1_3B attribute) (bridge.models.mamba.mamba_provider.MambaProvider2_7B attribute) (bridge.models.mamba.mamba_provider.MambaProvider370M attribute) (bridge.models.mamba.mamba_provider.MambaProvider780M attribute) (bridge.models.mamba.mamba_provider.NVIDIAMambaHybridProvider8B attribute) (bridge.models.mamba.mamba_provider.NVIDIAMambaProvider8B attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModel47BProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModel4BProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModel56BProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModel8BProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronNano12Bv2Provider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronNano9Bv2Provider attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider14B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider1P5B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider32B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider3B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider500M attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider72B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider7B attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider1P5B attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider500M attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider72B attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider7B attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider14B attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider1P7B attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider32B attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider4B attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider600M attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider8B attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider235B_A22B attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider30B_A3B attribute) num_layers_at_end_in_bf16 (bridge.training.mixed_precision.MixedPrecisionConfig attribute) num_layers_at_start_in_bf16 (bridge.training.mixed_precision.MixedPrecisionConfig attribute) num_moe_experts (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.llama.llama_provider.Llama4Experts128ModelProvider attribute) (bridge.models.llama.llama_provider.Llama4Experts16ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) num_parameters (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM property) num_parameters() (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) num_query_groups (bridge.models.llama.llama_provider.CodeLlamaModelProvider34B attribute) (bridge.models.llama.llama_provider.Llama2ModelProvider13B attribute) (bridge.models.llama.llama_provider.Llama2ModelProvider70B attribute) (bridge.models.llama.llama_provider.Llama2ModelProvider7B attribute) (bridge.models.llama.llama_provider.Llama32ModelProvider1B attribute) (bridge.models.llama.llama_provider.Llama32ModelProvider3B attribute) (bridge.models.llama.llama_provider.Llama3ModelProvider attribute) (bridge.models.mamba.mamba_provider.NVIDIAMambaHybridProvider8B attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider14B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider1P5B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider32B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider3B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider500M attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider72B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider7B attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider1P5B attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider500M attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider72B attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider7B attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider235B_A22B attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider30B_A3B attribute) num_return_sequences (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) num_sm (bridge.training.comm_overlap.BulkOverlapCfg attribute) (bridge.training.comm_overlap.PipelineOverlapCfg attribute) (bridge.training.comm_overlap.RingExchangeOverlapCfg attribute) num_splits (bridge.training.comm_overlap.PipelineOverlapCfg attribute) num_workers (bridge.training.config.DataloaderConfig attribute) NVIDIAMambaHybridProvider8B (class in bridge.models.mamba.mamba_provider) NVIDIAMambaProvider8B (class in bridge.models.mamba.mamba_provider) nvlm_yi_34b_template (in module bridge.training.tokenizers.multimodal_tokenizer) nvrx_straggler (bridge.training.config.ConfigContainer attribute) nvrx_straggler_manager (bridge.training.state.GlobalState property) NVRxStragglerDetectionConfig (class in bridge.training.config) NVRxStragglerDetectionManager (class in bridge.training.nvrx_straggler) O offsets() (bridge.training.tokenizers.tokenizer._HuggingFaceTokenizer method) (bridge.training.tokenizers.tokenizer._NullTokenizer method) (bridge.training.tokenizers.tokenizer._SentencePieceTokenizer method) (bridge.training.tokenizers.tokenizer.CustomTikTokenizer method) old_context_len (bridge.models.llama.llama_provider.Llama31ModelProvider attribute) on_checkpoint_loaded() (in module bridge.training.fault_tolerance) on_checkpointing_end() (in module bridge.training.fault_tolerance) on_checkpointing_start() (in module bridge.training.fault_tolerance) on_eval_step_end() (in module bridge.training.fault_tolerance) on_eval_step_start() (in module bridge.training.fault_tolerance) on_load_checkpoint_success() (in module bridge.training.utils.wandb_utils) on_save_checkpoint_success() (in module bridge.training.utils.wandb_utils) on_training_step_end() (in module bridge.training.fault_tolerance) on_training_step_start() (in module bridge.training.fault_tolerance) optimizer (bridge.training.config.ConfigContainer attribute) (bridge.training.setup.SetupOutput attribute) OPTIONAL_ARTIFACTS (bridge.models.hf_pretrained.base.PreTrainedBase attribute) (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM attribute) (bridge.models.hf_pretrained.vlm.PreTrainedVLM attribute) original_answers (bridge.data.builders.hf_dataset.ProcessExampleOutput attribute) output (bridge.data.builders.hf_dataset.ProcessExampleOutput attribute) output_attentions (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) output_scores (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) overlap_grad_reduce (bridge.training.comm_overlap._CommOverlapConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) overlap_p2p_comm (bridge.training.comm_overlap._CommOverlapConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) overlap_param_gather (bridge.training.comm_overlap._CommOverlapConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) overlap_param_gather_with_optimizer_step (bridge.models.model_provider.GetModelKwargs attribute) (bridge.training.comm_overlap._CommOverlapConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) override_opt_param_scheduler (bridge.training.config.SchedulerConfig attribute) OverridesError P pack_metadata (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder property) packed_metadata_path (bridge.data.datasets.packed_sequence.PackedSequenceSpecs attribute) packed_sequence_size (bridge.data.datasets.packed_sequence.PackedSequenceSpecs attribute) packed_sequence_specs (bridge.training.config.FinetuningDatasetConfig attribute) packed_train_data_path (bridge.data.datasets.packed_sequence.PackedSequenceSpecs attribute) packed_val_data_path (bridge.data.datasets.packed_sequence.PackedSequenceSpecs attribute) PackedSequenceSpecs (class in bridge.data.datasets.packed_sequence) PACKING_ALGOS (in module bridge.data.datasets.packing_utils) pad (bridge.training.tokenizers.multimodal_tokenizer.MultimodalTokenizer property) (bridge.training.tokenizers.tokenizer._BertWordPieceTokenizer property) (bridge.training.tokenizers.tokenizer._SentencePieceTokenizer property) pad_cu_seqlens (bridge.data.datasets.packed_sequence.PackedSequenceSpecs attribute) pad_seq_to_mult() (in module bridge.peft.utils) pad_token_id (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) (bridge.training.tokenizers.multimodal_tokenizer.PromptConfig attribute) padded_vocab_size (bridge.training.tokenizers.config.TokenizerConfig attribute) padding (bridge.models.hf_pretrained.causal_lm.EncodeKwargs attribute) parallel_output (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) ParallelLinearAdapter (class in bridge.peft.utils) ParallelLinearDoRAAdapter (class in bridge.peft.dora_layers) param_is_not_shared() (in module bridge.training.utils.train_utils) param_name (bridge.models.conversion.model_bridge.HFWeightTuple attribute) (bridge.models.conversion.model_bridge.MegatronWeightTuple attribute) (bridge.models.conversion.model_bridge.WeightConversionTask attribute) param_weight (bridge.models.conversion.model_bridge.WeightConversionTask attribute) params_dtype (bridge.models.mamba.mamba_provider.MambaProvider attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) (bridge.training.mixed_precision.MixedPrecisionConfig attribute) params_to_save (bridge.peft.base.PEFT attribute) parse_hydra_overrides() (in module bridge.training.utils.omegaconf_utils) PARTIAL (bridge.utils.instantiate_utils._Keys attribute) PATCH (in module bridge.package_info) patch_linear_module() (in module bridge.peft.lora_layers) path (bridge.models.hf_pretrained.state.SafeTensorsStateSource property) peft (bridge.training.config.ConfigContainer attribute) PEFT (class in bridge.peft.base) persist_layer_norm (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) persistent_workers (bridge.training.config.DataloaderConfig attribute) pin_memory (bridge.training.config.DataloaderConfig attribute) pipeline_dtype (bridge.training.mixed_precision.MixedPrecisionConfig attribute) PipelineOverlapCfg (class in bridge.training.comm_overlap) position_embedding_type (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.llama.llama_provider.Llama3ModelProvider attribute) (bridge.models.llama.llama_provider.LlamaModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) post_training_step_callbacks() (in module bridge.training.train) post_wrap_hook (bridge.models.model_provider.GetModelKwargs attribute) (bridge.models.model_provider.ModelProviderMixin property) pp_rank (bridge.models.conversion.model_bridge.WeightConversionTask attribute) (bridge.models.conversion.param_mapping.MegatronParamMapping property) pp_size (bridge.models.conversion.param_mapping.MegatronParamMapping property) PRE_RELEASE (in module bridge.package_info) pre_wrap_hook (bridge.models.model_provider.GetModelKwargs attribute) (bridge.models.model_provider.ModelProviderMixin property) PREFIX_STR (in module bridge.data.datasets.sft) (in module bridge.data.datasets.utils) prepare_data() (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder method) (bridge.data.builders.hf_dataset.HFDatasetBuilder method) prepare_packed_data() (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder method) prepare_packed_sequence_data() (in module bridge.data.datasets.packed_sequence) preprocess_and_split_data() (in module bridge.data.builders.hf_dataset) pretrain() (in module bridge.training.pretrain) pretrain_train_valid_test_datasets_provider() (in module bridge.data.utils) pretrained_checkpoint (bridge.training.config.CheckpointConfig attribute) PRETRAINED_MERGES_ARCHIVE_MAP (in module bridge.training.tokenizers.gpt2_tokenization) PRETRAINED_VOCAB_ARCHIVE_MAP (in module bridge.training.tokenizers.gpt2_tokenization) PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP (in module bridge.training.tokenizers.gpt2_tokenization) PreTrainedBase (class in bridge.models.hf_pretrained.base) PreTrainedCausalLM (class in bridge.models.hf_pretrained.causal_lm) PreTrainedVLM (class in bridge.models.hf_pretrained.vlm) print_rank_0() (in module bridge.utils.common_utils) print_rank_last() (in module bridge.utils.common_utils) printable_text() (in module bridge.training.tokenizers.bert_tokenization) process_example_fn (bridge.data.builders.hf_dataset.HFDatasetConfig attribute) process_images_and_text() (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) process_squad_example() (in module bridge.data.hf_processors.squad) ProcessExampleFn (class in bridge.data.builders.hf_dataset) ProcessExampleOutput (class in bridge.data.builders.hf_dataset) processor (bridge.models.hf_pretrained.vlm.PreTrainedVLM property) profile_ranks (bridge.training.config.ProfilingConfig attribute) profile_step_end (bridge.training.config.ProfilingConfig attribute) profile_step_start (bridge.training.config.ProfilingConfig attribute) profiling (bridge.training.config.ConfigContainer attribute) profiling_interval (bridge.training.config.NVRxStragglerDetectionConfig attribute) ProfilingConfig (class in bridge.training.config) proj_dgrad (bridge.training.comm_overlap.TransformerLayerTPOverlapCfg attribute) proj_fprop (bridge.training.comm_overlap.TransformerLayerTPOverlapCfg attribute) PromptConfig (class in bridge.training.tokenizers.multimodal_tokenizer) provide() (bridge.models.gpt_provider.GPTModelProvider method) (bridge.models.llama.llama_provider.Llama31ModelProvider method) (bridge.models.mamba.mamba_provider.MambaProvider method) (bridge.models.model_provider.ModelProviderMixin method) (bridge.models.t5_provider.T5ModelProvider method) provide_distributed_model() (bridge.models.model_provider.ModelProviderMixin method) provider_bridge() (bridge.models.conversion.model_bridge.MegatronModelBridge method) (bridge.models.llama.llama_bridge.LlamaBridge method) (bridge.models.qwen.qwen2_bridge.Qwen2Bridge method) (bridge.models.qwen.qwen3_bridge.Qwen3Bridge method) (bridge.models.qwen.qwen3_moe_bridge.Qwen3MoEBridge method) push_to_hub() (bridge.models.conversion.auto_bridge.AutoBridge method) Q qk_l2_norm (bridge.models.llama.llama_provider.Llama4Experts128ModelProvider attribute) (bridge.models.llama.llama_provider.Llama4Experts16ModelProvider attribute) (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) qk_layernorm (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) qkv_dgrad (bridge.training.comm_overlap.TransformerLayerTPOverlapCfg attribute) qkv_fprop (bridge.training.comm_overlap.TransformerLayerTPOverlapCfg attribute) qkv_wgrad (bridge.training.comm_overlap.TransformerLayerTPOverlapCfg attribute) QKVMapping (class in bridge.models.conversion.param_mapping) Qwen25ModelProvider14B (class in bridge.models.qwen.qwen_provider) Qwen25ModelProvider1P5B (class in bridge.models.qwen.qwen_provider) Qwen25ModelProvider32B (class in bridge.models.qwen.qwen_provider) Qwen25ModelProvider3B (class in bridge.models.qwen.qwen_provider) Qwen25ModelProvider500M (class in bridge.models.qwen.qwen_provider) Qwen25ModelProvider72B (class in bridge.models.qwen.qwen_provider) Qwen25ModelProvider7B (class in bridge.models.qwen.qwen_provider) Qwen2Bridge (class in bridge.models.qwen.qwen2_bridge) Qwen2ModelProvider (class in bridge.models.qwen.qwen_provider) Qwen2ModelProvider1P5B (class in bridge.models.qwen.qwen_provider) Qwen2ModelProvider500M (class in bridge.models.qwen.qwen_provider) Qwen2ModelProvider72B (class in bridge.models.qwen.qwen_provider) Qwen2ModelProvider7B (class in bridge.models.qwen.qwen_provider) qwen2p0_custom_template (in module bridge.training.tokenizers.multimodal_tokenizer) Qwen3Bridge (class in bridge.models.qwen.qwen3_bridge) Qwen3ModelProvider (class in bridge.models.qwen.qwen_provider) Qwen3ModelProvider14B (class in bridge.models.qwen.qwen_provider) Qwen3ModelProvider1P7B (class in bridge.models.qwen.qwen_provider) Qwen3ModelProvider32B (class in bridge.models.qwen.qwen_provider) Qwen3ModelProvider4B (class in bridge.models.qwen.qwen_provider) Qwen3ModelProvider600M (class in bridge.models.qwen.qwen_provider) Qwen3ModelProvider8B (class in bridge.models.qwen.qwen_provider) Qwen3MoEBridge (class in bridge.models.qwen.qwen3_moe_bridge) Qwen3MoEModelProvider (class in bridge.models.qwen.qwen_provider) Qwen3MoEModelProvider235B_A22B (class in bridge.models.qwen.qwen_provider) Qwen3MoEModelProvider30B_A3B (class in bridge.models.qwen.qwen_provider) R R (in module bridge.utils.decorators) rampup_batch_size (bridge.training.config.TrainingConfig attribute) RandomSeedDataset (class in bridge.data.samplers) rank_0_prepare_data() (in module bridge.data.datasets.utils) read_metadata() (in module bridge.training.checkpointing) read_run_config() (in module bridge.training.utils.checkpoint_utils) read_train_state() (in module bridge.training.utils.checkpoint_utils) recompute_num_layers (bridge.models.t5_provider.T5ModelProvider attribute) record_memory_history (bridge.training.config.ProfilingConfig attribute) record_shapes (bridge.training.config.ProfilingConfig attribute) reduce_aux_losses_tracker_across_ranks() (in module bridge.training.utils.train_utils) reduce_max_stat_across_model_parallel_group() (in module bridge.training.utils.train_utils) regex() (bridge.models.hf_pretrained.state.StateDict method) register() (in module bridge.training.mixed_precision) register_bridge() (bridge.models.conversion.model_bridge.MegatronModelBridge class method) register_bridge_implementation() (in module bridge.models.conversion.model_bridge) register_module_type() (bridge.models.conversion.param_mapping.AutoMapping class method) register_post_wrap_hook() (bridge.models.model_provider.ModelProviderMixin method) register_pre_wrap_hook() (bridge.models.model_provider.ModelProviderMixin method) relative_attention_max_distance (bridge.models.t5_provider.T5ModelProvider attribute) relative_attention_num_buckets (bridge.models.t5_provider.T5ModelProvider attribute) release() (bridge.training.utils.sig_utils.DistributedSignalHandler method) reload_mergeable_ranks() (in module bridge.training.tokenizers.tokenizer) remove_non_pickleables() (in module bridge.models.conversion.utils) repetition_penalty (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) ReplicatedMapping (class in bridge.models.conversion.param_mapping) replication (bridge.training.config.CheckpointConfig attribute) replication_factor (bridge.training.config.CheckpointConfig attribute) replication_jump (bridge.training.config.CheckpointConfig attribute) report_memory() (in module bridge.training.utils.train_utils) report_theoretical_memory() (in module bridge.training.utils.theoretical_memory_utils) report_time_interval (bridge.training.config.NVRxStragglerDetectionConfig attribute) rerun_mode (bridge.training.config.RerunStateMachineConfig attribute) rerun_state_machine (bridge.training.config.ConfigContainer attribute) RerunStateMachineConfig (class in bridge.training.config) resolve() (bridge.models.conversion.param_mapping.GatedMLPMapping method) (bridge.models.conversion.param_mapping.MegatronParamMapping method) (bridge.models.conversion.param_mapping.QKVMapping method) return_attention_mask (bridge.models.hf_pretrained.causal_lm.EncodeKwargs attribute) return_dict_in_generate (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) return_tensors (bridge.models.hf_pretrained.causal_lm.EncodeKwargs attribute) return_token_type_ids (bridge.models.hf_pretrained.causal_lm.EncodeKwargs attribute) rewrite (bridge.data.builders.hf_dataset.HFDatasetConfig attribute) RingExchangeOverlapCfg (class in bridge.training.comm_overlap) rng (bridge.training.config.ConfigContainer attribute) RNGConfig (class in bridge.training.config) rope_scaling (bridge.models.llama.llama_provider.Llama4Experts128ModelProvider attribute) (bridge.models.llama.llama_provider.Llama4Experts16ModelProvider attribute) (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) rope_scaling_factor (bridge.models.llama.llama_provider.Llama4Experts16ModelProvider attribute) (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) rotary_base (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.llama.llama_provider.CodeLlamaModelProvider13B attribute) (bridge.models.llama.llama_provider.CodeLlamaModelProvider34B attribute) (bridge.models.llama.llama_provider.CodeLlamaModelProvider7B attribute) (bridge.models.llama.llama_provider.Llama31ModelProvider405B attribute) (bridge.models.llama.llama_provider.Llama31ModelProvider70B attribute) (bridge.models.llama.llama_provider.Llama31ModelProvider8B attribute) (bridge.models.llama.llama_provider.Llama32ModelProvider1B attribute) (bridge.models.llama.llama_provider.Llama32ModelProvider3B attribute) (bridge.models.llama.llama_provider.Llama3ModelProvider70B attribute) (bridge.models.llama.llama_provider.Llama3ModelProvider8B attribute) (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) rotary_interleaved (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) rotary_percent (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.llama.llama_provider.Llama3ModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) RowParallelMapping (class in bridge.models.conversion.param_mapping) S safe_import() (in module bridge.utils.import_utils) safe_import_from() (in module bridge.utils.import_utils) safe_shutdown_nvrx_straggler_manager() (in module bridge.training.nvrx_straggler) safe_yaml_representers() (in module bridge.utils.yaml_utils) SafeTensorsStateSource (class in bridge.models.hf_pretrained.state) save (bridge.training.config.CheckpointConfig attribute) save_artifacts() (bridge.models.hf_pretrained.base.PreTrainedBase method) save_checkpoint() (in module bridge.training.checkpointing) save_checkpoint_and_time() (in module bridge.training.train) save_generator() (bridge.models.hf_pretrained.state.SafeTensorsStateSource method) save_hf_pretrained() (bridge.models.config.ConfigProtocol method) (bridge.models.conversion.auto_bridge.AutoBridge method) (bridge.models.model_provider.ModelProviderMixin method) (in module bridge.models.config) save_hf_weights() (bridge.models.conversion.auto_bridge.AutoBridge method) save_interval (bridge.training.config.CheckpointConfig attribute) save_megatron_model() (bridge.models.conversion.auto_bridge.AutoBridge method) (in module bridge.training.model_load_save) save_optim (bridge.training.config.CheckpointConfig attribute) save_pretrained() (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) save_rng (bridge.training.config.CheckpointConfig attribute) save_vocabulary() (bridge.training.tokenizers.gpt2_tokenization.GPT2Tokenizer method) scale_factor (bridge.models.llama.llama_provider.Llama31ModelProvider attribute) (bridge.models.llama.llama_provider.Llama32ModelProvider1B attribute) (bridge.models.llama.llama_provider.Llama32ModelProvider3B attribute) scatter_embedding_sequence_parallel (bridge.models.gpt_provider.GPTModelProvider attribute) scatter_to_tp_ranks() (bridge.models.conversion.param_mapping.MegatronParamMapping method) schedule_async_save() (in module bridge.training.checkpointing) scheduler (bridge.training.config.ConfigContainer attribute) (bridge.training.setup.SetupOutput attribute) SchedulerConfig (class in bridge.training.config) seed (bridge.training.config.FinetuningDatasetConfig attribute) (bridge.training.config.RNGConfig attribute) seen_checkpoints_cnt (bridge.training.state.FaultToleranceState attribute) seen_tr_iters_cnt (bridge.training.state.FaultToleranceState attribute) sep (bridge.training.tokenizers.tokenizer._BertWordPieceTokenizer property) (bridge.training.tokenizers.tokenizer._GPTSentencePieceTokenizer property) (bridge.training.tokenizers.tokenizer._Llama2Tokenizer property) (bridge.training.tokenizers.tokenizer._NullTokenizer property) (bridge.training.tokenizers.tokenizer._SentencePieceTokenizer property) seq_len_interpolation_factor (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) seq_length (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.gpt_provider.GPTProvider126M attribute) (bridge.models.gpt_provider.GPTProvider175B attribute) (bridge.models.gpt_provider.GPTProvider20B attribute) (bridge.models.gpt_provider.GPTProvider40B attribute) (bridge.models.gpt_provider.GPTProvider5B attribute) (bridge.models.gpt_provider.GPTProvider7B attribute) (bridge.models.llama.llama_provider.CodeLlamaModelProvider13B attribute) (bridge.models.llama.llama_provider.CodeLlamaModelProvider34B attribute) (bridge.models.llama.llama_provider.CodeLlamaModelProvider7B attribute) (bridge.models.llama.llama_provider.Llama31ModelProvider405B attribute) (bridge.models.llama.llama_provider.Llama31ModelProvider70B attribute) (bridge.models.llama.llama_provider.Llama31ModelProvider8B attribute) (bridge.models.llama.llama_provider.Llama3ModelProvider70B attribute) (bridge.models.llama.llama_provider.Llama3ModelProvider8B attribute) (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) (bridge.models.llama.llama_provider.LlamaModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider130M attribute) (bridge.models.mamba.mamba_provider.MambaProvider1_3B attribute) (bridge.models.mamba.mamba_provider.MambaProvider2_7B attribute) (bridge.models.mamba.mamba_provider.MambaProvider370M attribute) (bridge.models.mamba.mamba_provider.MambaProvider780M attribute) (bridge.models.mamba.mamba_provider.NVIDIAMambaHybridProvider8B attribute) (bridge.models.mamba.mamba_provider.NVIDIAMambaProvider8B attribute) (bridge.models.mamba.nemotron_h_provider.NemotronHModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider14B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider1P5B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider32B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider3B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider500M attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider72B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider7B attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider1P5B attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider500M attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider72B attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider7B attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) (bridge.training.config.FinetuningDatasetConfig attribute) seq_length_dec (bridge.models.t5_provider.T5ModelProvider attribute) set_checkpoint_version() (in module bridge.training.checkpointing) set_epoch() (bridge.data.samplers.RandomSeedDataset method) set_jit_fusion_options() (in module bridge.training.initialize) set_level_for_all_loggers (bridge.training.config.LoggerConfig attribute) set_params_to_save() (bridge.peft.base.PEFT method) set_sm_margin (bridge.training.comm_overlap.BulkOverlapCfg attribute) (bridge.training.comm_overlap.PipelineOverlapCfg attribute) (bridge.training.comm_overlap.RingExchangeOverlapCfg attribute) set_special_tokens() (bridge.training.tokenizers.gpt2_tokenization.GPT2Tokenizer method) setup() (bridge.training.comm_overlap.CommOverlapConfig method) (bridge.training.mixed_precision.MixedPrecisionConfig method) (in module bridge.training.fault_tolerance) (in module bridge.training.setup) setup_data_iterators() (in module bridge.data.loaders) setup_logging() (in module bridge.training.utils.log_utils) setup_optimizer() (in module bridge.training.optim) SetupOutput (class in bridge.training.setup) sharded_state_dict() (bridge.models.gpt_full_te_layer_autocast_spec.TETransformerLayerAutocast method) (bridge.peft.adapter_wrapper.AdapterWrapper method) (bridge.peft.canonical_lora.ModuleDict method) (bridge.peft.dora_layers.ParallelLinearDoRAAdapter method) (bridge.peft.utils.ParallelLinearAdapter method) share_embeddings_and_output_weights (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.llama.llama_provider.Llama32ModelProvider1B attribute) (bridge.models.llama.llama_provider.Llama32ModelProvider3B attribute) (bridge.models.llama.llama_provider.Llama3ModelProvider attribute) (bridge.models.llama.llama_provider.LlamaModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider1P5B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider3B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider500M attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider1P5B attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider500M attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider1P7B attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider4B attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider600M attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) shutdown() (bridge.training.nvrx_straggler.NVRxStragglerDetectionManager method) (in module bridge.training.fault_tolerance) signal_handler (bridge.training.state.GlobalState property) signals_received() (bridge.training.utils.sig_utils.DistributedSignalHandler method) simulate_fault (bridge.training.config.FaultToleranceConfig attribute) simulated_fault_base_delay (bridge.training.config.FaultToleranceConfig attribute) simulated_fault_rank (bridge.training.config.FaultToleranceConfig attribute) simulated_fault_type (bridge.training.config.FaultToleranceConfig attribute) skip_special_tokens (bridge.models.hf_pretrained.causal_lm.DecodeKwargs attribute) skip_train (bridge.training.config.TrainingConfig attribute) skipped_train_samples (bridge.training.state.TrainState attribute) source (bridge.models.hf_pretrained.state.StateDict attribute) special_tokens (bridge.training.tokenizers.config.TokenizerConfig attribute) SPECIAL_TOKENS_NAME (in module bridge.training.tokenizers.gpt2_tokenization) SPIKY_LOSS_FACTOR (in module bridge.training.losses) split (bridge.data.builders.hf_dataset.HFDatasetConfig attribute) split_qkv_biases() (in module bridge.models.conversion.param_mapping) split_qkv_weights() (in module bridge.models.conversion.param_mapping) split_val_from_train (bridge.data.builders.hf_dataset.HFDatasetConfig attribute) squared_relu() (in module bridge.training.mlm_compat.activations) start_weight_decay (bridge.training.config.SchedulerConfig attribute) state (bridge.models.hf_pretrained.base.PreTrainedBase property) (bridge.training.setup.SetupOutput attribute) state_dict() (bridge.peft.adapter_wrapper.AdapterWrapper method) (bridge.training.state.TrainState method) StateDict (class in bridge.models.hf_pretrained.state) StateSource (class in bridge.models.hf_pretrained.state) step (bridge.training.state.TrainState attribute) stop_if_detected (bridge.training.config.NVRxStragglerDetectionConfig attribute) straggler (bridge.training.config.ConfigContainer attribute) straggler_ctrlr_port (bridge.training.config.StragglerDetectionConfig attribute) straggler_minmax_count (bridge.training.config.StragglerDetectionConfig attribute) straggler_timer (bridge.training.state.GlobalState property) StragglerDetectionConfig (class in bridge.training.config) stream_weights_hf_to_megatron() (bridge.models.conversion.model_bridge.MegatronModelBridge method) stream_weights_megatron_to_hf() (bridge.models.conversion.model_bridge.MegatronModelBridge method) (in module bridge.models.conversion.model_bridge) STRICT (bridge.utils.instantiate_utils.InstantiationMode attribute) supports() (bridge.models.conversion.auto_bridge.AutoBridge class method) SYSTEM_TOKEN (in module bridge.data.datasets.utils) T T (in module bridge.models.config) (in module bridge.training.utils.config_utils) T5ModelProvider (class in bridge.models.t5_provider) TARGET (bridge.utils.instantiate_utils._Keys attribute) target_modules (bridge.peft.canonical_lora.CanonicalLoRA attribute) (bridge.peft.dora.DoRA attribute) (bridge.peft.lora.LoRA attribute) (bridge.peft.module_matcher.ModuleMatcher attribute) te_rng_tracker (bridge.training.config.RNGConfig attribute) TECL (in module bridge.peft.utils) TELinearAdapter (class in bridge.peft.lora_layers) temperature (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) temporary_distributed_context() (in module bridge.training.model_load_save) tensorboard_dir (bridge.training.config.LoggerConfig attribute) tensorboard_log_interval (bridge.training.config.LoggerConfig attribute) tensorboard_logger (bridge.training.state.GlobalState property) tensorboard_queue_size (bridge.training.config.LoggerConfig attribute) TERL (in module bridge.peft.utils) test_data_iterator (bridge.training.setup.SetupOutput attribute) test_path (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder property) TETransformerLayerAutocast (class in bridge.models.gpt_full_te_layer_autocast_spec) text_to_ids() (bridge.training.tokenizers.tokenizer.MegatronTokenizer method) tiktoken_num_special_tokens (bridge.training.tokenizers.config.TokenizerConfig attribute) tiktoken_pattern (bridge.training.tokenizers.config.TokenizerConfig attribute) tiktoken_special_tokens (bridge.training.tokenizers.config.TokenizerConfig attribute) timers (bridge.training.state.GlobalState property) timing_log_level (bridge.training.config.LoggerConfig attribute) timing_log_option (bridge.training.config.LoggerConfig attribute) to() (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM method) (bridge.models.hf_pretrained.vlm.PreTrainedVLM method) to_dict() (bridge.training.utils.config_utils._ConfigContainerBase method) to_megatron_model() (bridge.models.conversion.auto_bridge.AutoBridge method) to_megatron_provider() (bridge.models.conversion.auto_bridge.AutoBridge method) to_yaml() (bridge.training.utils.config_utils._ConfigContainerBase method) tokenize() (bridge.training.tokenizers.bert_tokenization.BasicTokenizer method) (bridge.training.tokenizers.bert_tokenization.FullTokenizer method) (bridge.training.tokenizers.bert_tokenization.WordpieceTokenizer method) (bridge.training.tokenizers.gpt2_tokenization.GPT2Tokenizer method) (bridge.training.tokenizers.multimodal_tokenizer.MultimodalTokenizer method) (bridge.training.tokenizers.tokenizer._BertWordPieceTokenizer method) (bridge.training.tokenizers.tokenizer._GPT2BPETokenizer method) (bridge.training.tokenizers.tokenizer._GPTSentencePieceTokenizer method) (bridge.training.tokenizers.tokenizer._HuggingFaceTokenizer method) (bridge.training.tokenizers.tokenizer._Llama2Tokenizer method) (bridge.training.tokenizers.tokenizer._NullTokenizer method) (bridge.training.tokenizers.tokenizer._SentencePieceTokenizer method) (bridge.training.tokenizers.tokenizer.CustomTikTokenizer method) tokenize_conversation() (bridge.training.tokenizers.multimodal_tokenizer.MultimodalTokenizer method) tokenize_dataset() (in module bridge.data.datasets.packed_sequence) tokenizer (bridge.models.hf_pretrained.causal_lm.PreTrainedCausalLM property) (bridge.models.hf_pretrained.vlm.PreTrainedVLM property) (bridge.training.config.ConfigContainer attribute) (bridge.training.state.GlobalState property) tokenizer_model (bridge.training.tokenizers.config.TokenizerConfig attribute) tokenizer_model_name (bridge.data.datasets.packed_sequence.PackedSequenceSpecs attribute) tokenizer_prompt_format (bridge.training.tokenizers.config.TokenizerConfig attribute) tokenizer_type (bridge.training.tokenizers.config.TokenizerConfig attribute) TokenizerConfig (class in bridge.training.tokenizers.config) top_k (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) top_p (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) torch_dist_init() (in module bridge.training.initialize) torch_dtype_from_mcore_config() (in module bridge.training.model_load_save) torch_dtype_from_precision() (in module bridge.models.gpt_full_te_layer_autocast_spec) torchrun_main() (in module bridge.models.decorators.torchrun) tp_comm_bootstrap_backend (bridge.training.comm_overlap._CommOverlapConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) tp_comm_overlap (bridge.training.comm_overlap._CommOverlapConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) tp_comm_overlap_cfg (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) (bridge.training.comm_overlap._CommOverlapConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) tp_group (bridge.models.conversion.param_mapping.MegatronParamMapping property) tp_only_amax_red (bridge.models.gpt_provider.GPTModelProvider attribute) tp_rank (bridge.models.conversion.param_mapping.MegatronParamMapping property) tp_size (bridge.models.conversion.param_mapping.MegatronParamMapping property) TPOverlapCfg (class in bridge.training.comm_overlap) track_moe_metrics() (in module bridge.training.utils.train_utils) TRACKER_PREFIX (in module bridge.training.checkpointing) (in module bridge.training.utils.checkpoint_utils) train (bridge.training.config.ConfigContainer attribute) train() (in module bridge.training.train) train_data_iterator (bridge.training.setup.SetupOutput attribute) train_iters (bridge.training.config.TrainingConfig attribute) train_path (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder property) train_path_packed (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder property) train_state (bridge.training.state.GlobalState property) TRAIN_STATE_FILE (in module bridge.training.utils.checkpoint_utils) train_step() (in module bridge.training.train) train_sync_interval (bridge.training.config.TrainingConfig attribute) training_log() (in module bridge.training.utils.train_utils) TrainingConfig (class in bridge.training.config) TrainState (class in bridge.training.state) transform() (bridge.peft.base.PEFT method) (bridge.peft.canonical_lora.CanonicalLoRA method) (bridge.peft.dora.DoRA method) (bridge.peft.lora.LoRA method) (bridge.peft.lora.LoRAMerge method) transformer_config (bridge.models.conversion.auto_bridge.AutoBridge property) transformer_engine_full_layer_spec() (in module bridge.models.gpt_provider) transformer_engine_layer_spec() (in module bridge.models.gpt_provider) (in module bridge.models.t5_provider) transformer_layer_spec (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) TransformerLayerTPOverlapCfg (class in bridge.training.comm_overlap) truncation (bridge.models.hf_pretrained.causal_lm.EncodeKwargs attribute) TYPE_INSTRUCTION (in module bridge.data.datasets.utils) U UnavailableError UnavailableMeta (class in bridge.utils.import_utils) UnavailableNullContext (class in bridge.utils.import_utils) unk (bridge.training.tokenizers.tokenizer.CustomTikTokenizer property) unpad_seq_to_mult() (in module bridge.peft.utils) unwrap_model() (in module bridge.utils.common_utils) update_config_with_precision_overrides() (in module bridge.training.mixed_precision) use_cache (bridge.models.hf_pretrained.causal_lm.GenerateKwargs attribute) use_checkpoint_args (bridge.training.config.CheckpointConfig attribute) use_checkpoint_opt_param_scheduler (bridge.training.config.SchedulerConfig attribute) use_cpu_initialization (bridge.models.model_provider.GetModelKwargs attribute) use_dist_ckpt() (in module bridge.utils.common_utils) use_gloo_process_groups (bridge.training.config.DistributedInitConfig attribute) use_mamba_mem_eff_path (bridge.models.mamba.nemotron_h_provider.NemotronHModel4BProvider attribute) use_nsys_profiler (bridge.training.config.ProfilingConfig attribute) use_persistent_ckpt_worker (bridge.training.config.CheckpointConfig attribute) use_pytorch_profiler (bridge.training.config.ProfilingConfig attribute) use_sharp (bridge.training.config.DistributedInitConfig attribute) use_te_rng_tracker (bridge.models.gpt_provider.GPTModelProvider attribute) use_torch_fsdp2 (bridge.models.model_provider.GetModelKwargs attribute) (bridge.training.config.DistributedInitConfig attribute) use_tp_pp_dp_mapping (bridge.training.config.DistributedInitConfig attribute) use_transformer_engine_full_layer_spec (bridge.models.gpt_provider.GPTModelProvider attribute) use_transformer_engine_op_fuser (bridge.models.llama.llama_provider.LlamaModelProvider attribute) userbuffers_bf16_b200_h12288_tp4_mbs1_seqlen2048 (in module bridge.training.comm_overlap) userbuffers_bf16_b200_h16384_tp4_cp2_mbs1_seqlen8192 (in module bridge.training.comm_overlap) userbuffers_bf16_b200_h18432_tp8_mbs1_seqlen4096 (in module bridge.training.comm_overlap) userbuffers_bf16_b200_h6144_tp2_mbs1_seqlen4096 (in module bridge.training.comm_overlap) userbuffers_bf16_b200_h8192_tp2_mbs1_seqlen8192 (in module bridge.training.comm_overlap) userbuffers_bf16_h100_h12288_tp4_mbs1_seqlen2048 (in module bridge.training.comm_overlap) userbuffers_bf16_h100_h16384_tp8_cp2_mbs1_seqlen8192 (in module bridge.training.comm_overlap) userbuffers_bf16_h100_h6144_tp2_mbs2_seqlen2048 (in module bridge.training.comm_overlap) userbuffers_bf16_h100_h8192_tp4_mbs1_seqlen8192 (in module bridge.training.comm_overlap) userbuffers_fp8_b200_h12288_tp4_mbs1_seqlen2048 (in module bridge.training.comm_overlap) userbuffers_fp8_b200_h16384_tp4_cp2_mbs1_seqlen8192 (in module bridge.training.comm_overlap) userbuffers_fp8_b200_h18432_tp8_mbs1_seqlen4096 (in module bridge.training.comm_overlap) userbuffers_fp8_b200_h8192_tp2_mbs1_seqlen8192 (in module bridge.training.comm_overlap) userbuffers_fp8_h100_h12288_tp4_mbs1_seqlen2048 (in module bridge.training.comm_overlap) userbuffers_fp8_h100_h16384_tp4_mbs1_seqlen2048_lora (in module bridge.training.comm_overlap) userbuffers_fp8_h100_h16384_tp8_cp2_mbs1_seqlen8192 (in module bridge.training.comm_overlap) userbuffers_fp8_h100_h6144_tp2_mbs2_seqlen2048 (in module bridge.training.comm_overlap) userbuffers_fp8_h100_h8192_tp2_mbs1_seqlen4096_lora (in module bridge.training.comm_overlap) userbuffers_fp8_h100_h8192_tp4_mbs1_seqlen8192 (in module bridge.training.comm_overlap) V val_proportion (bridge.data.builders.hf_dataset.HFDatasetConfig attribute) valid_data_iterator (bridge.training.setup.SetupOutput attribute) validate() (bridge.training.config.ConfigContainer method) validate_case_matches_checkpoint() (in module bridge.training.tokenizers.bert_tokenization) validate_deepep() (in module bridge.training.deepep) validate_rope_fusion_compatibility() (in module bridge.utils.fusions) validation_path (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder property) validation_path_packed (bridge.data.builders.finetuning_dataset.FinetuningDatasetBuilder property) VERSION (in module bridge.package_info) virtual_pipeline_model_parallel_size (bridge.models.gpt_provider.GPTModelProvider attribute) VLMType (in module bridge.models.hf_pretrained.vlm) vocab (bridge.training.tokenizers.multimodal_tokenizer.MultimodalTokenizer property) (bridge.training.tokenizers.tokenizer._BertWordPieceTokenizer property) (bridge.training.tokenizers.tokenizer._GPT2BPETokenizer property) (bridge.training.tokenizers.tokenizer._HuggingFaceTokenizer property) (bridge.training.tokenizers.tokenizer._NullTokenizer property) (bridge.training.tokenizers.tokenizer._SentencePieceTokenizer property) (bridge.training.tokenizers.tokenizer.CustomTikTokenizer property) vocab_extra_ids (bridge.training.tokenizers.config.TokenizerConfig attribute) vocab_file (bridge.training.tokenizers.config.TokenizerConfig attribute) VOCAB_NAME (in module bridge.training.tokenizers.gpt2_tokenization) vocab_size (bridge.models.gpt_provider.GPTModelProvider attribute) (bridge.models.llama.llama_provider.Llama4ModelProvider attribute) (bridge.models.mamba.mamba_provider.MambaProvider attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider14B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider32B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider3B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider72B attribute) (bridge.models.qwen.qwen_provider.Qwen25ModelProvider7B attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider72B attribute) (bridge.models.qwen.qwen_provider.Qwen2ModelProvider7B attribute) (bridge.models.qwen.qwen_provider.Qwen3ModelProvider attribute) (bridge.models.qwen.qwen_provider.Qwen3MoEModelProvider attribute) (bridge.models.t5_provider.T5ModelProvider attribute) (bridge.training.tokenizers.config.TokenizerConfig attribute) (bridge.training.tokenizers.multimodal_tokenizer.MultimodalTokenizer property) (bridge.training.tokenizers.tokenizer._BertWordPieceTokenizer property) (bridge.training.tokenizers.tokenizer._GPT2BPETokenizer property) (bridge.training.tokenizers.tokenizer._HuggingFaceTokenizer property) (bridge.training.tokenizers.tokenizer._NullTokenizer property) (bridge.training.tokenizers.tokenizer._SentencePieceTokenizer property) (bridge.training.tokenizers.tokenizer.CustomTikTokenizer property) vocab_size() (bridge.training.tokenizers.bert_tokenization.FullTokenizer method) vp_stage (bridge.models.conversion.model_bridge.MegatronWeightTuple attribute) (bridge.models.conversion.model_bridge.WeightConversionTask attribute) W walk() (in module bridge.peft.walk_utils) wandb_entity (bridge.training.config.LoggerConfig attribute) wandb_exp_name (bridge.training.config.LoggerConfig attribute) wandb_logger (bridge.training.state.GlobalState property) wandb_project (bridge.training.config.LoggerConfig attribute) wandb_save_dir (bridge.training.config.LoggerConfig attribute) warning_filter() (in module bridge.training.utils.log_utils) wd_incr_steps (bridge.training.config.SchedulerConfig attribute) weight (bridge.models.conversion.model_bridge.HFWeightTuple attribute) (bridge.models.conversion.model_bridge.MegatronWeightTuple attribute) weight_decay_incr_style (bridge.training.config.SchedulerConfig attribute) WeightConversionTask (class in bridge.models.conversion.model_bridge) weights_verification_table() (in module bridge.models.conversion.utils) WeightType (in module bridge.models.conversion.param_mapping) wgrad_deferral_limit (bridge.training.comm_overlap._CommOverlapConfig attribute) (bridge.training.comm_overlap.CommOverlapConfig attribute) whitespace_tokenize() (in module bridge.training.tokenizers.bert_tokenization) wildcard_match() (in module bridge.peft.utils) WordpieceTokenizer (class in bridge.training.tokenizers.bert_tokenization) wrap_train_step_function() (bridge.training.nvrx_straggler.NVRxStragglerDetectionManager method) wrap_with_ddp (bridge.models.model_provider.GetModelKwargs attribute) wsd_decay_steps (bridge.training.config.SchedulerConfig attribute)