bridge.models.conversion.peft_bridge#

Module Contents#

Classes#

AdapterWeightConversionTask

Task describing an adapter’s LoRA weights for conversion or merging.

AdapterWeight

Materialized adapter weights ready for merge.

MegatronPeftBridge

Mixin providing adapter-aware utilities for Megatron model bridges.

Functions#

_select_hf_base_param_name

Return the HF base parameter name associated with this adapter.

Data#

API#

bridge.models.conversion.peft_bridge.MegatronModel#

‘TypeVar(…)’

bridge.models.conversion.peft_bridge.ADAPTER_NAME_MAP#

None

bridge.models.conversion.peft_bridge.ADAPTER_KEY_TO_SUFFIX#

None

bridge.models.conversion.peft_bridge.MEGATRON_TO_HF_LORA_SUFFIX#

None

class bridge.models.conversion.peft_bridge.AdapterWeightConversionTask#

Task describing an adapter’s LoRA weights for conversion or merging.

global_base_prefix: str#

None

adapter_key: Optional[str]#

None

alpha: int#

None

dim: int#

None

linear_in_task: megatron.bridge.models.conversion.model_bridge.WeightConversionTask#

None

linear_out_task: megatron.bridge.models.conversion.model_bridge.WeightConversionTask#

None

class bridge.models.conversion.peft_bridge.AdapterWeight#

Materialized adapter weights ready for merge.

global_base_prefix: str#

None

adapter_key: Optional[str]#

None

alpha: int#

None

dim: int#

None

linear_in_weight: megatron.bridge.models.conversion.model_bridge.MegatronWeightTuple#

None

linear_out_weight: megatron.bridge.models.conversion.model_bridge.MegatronWeightTuple#

None

bridge.models.conversion.peft_bridge._select_hf_base_param_name(
base_mapping,
adapter_key: Optional[str],
expected_suffix: str,
) Optional[str]#

Return the HF base parameter name associated with this adapter.

class bridge.models.conversion.peft_bridge.MegatronPeftBridge#

Mixin providing adapter-aware utilities for Megatron model bridges.

_get_lora_unwrapped_name(megatron_param: str) str#

Remove .to_wrap from LoRA parameter names.

_is_adapter_param_name(param_name: str) bool#

Return True if the parameter only belongs to a PEFT adapter.

_get_adapter_wrap_module(
local_base_prefix: str,
megatron_model: Union[bridge.models.conversion.peft_bridge.MegatronModel, List[bridge.models.conversion.peft_bridge.MegatronModel]],
vp_stage: int,
) tuple[Optional[torch.nn.Module], Optional[torch.nn.Module]]#

Locate the adapter wrapper and its underlying module.

_resolve_hf_adapter_param_name(
mapping_registry: megatron.bridge.models.conversion.mapping_registry.MegatronMappingRegistry,
global_base_prefix: str,
megatron_adapter_suffix: str,
base_suffix: str,
adapter_key: Optional[str],
) Optional[str]#

Resolve the HuggingFace adapter parameter name by translating the base Megatron name.

.. note::

LoRA adapters never register bias tensors for linear_in / linear_out, so callers only pass weight suffixes here. The bias fallback below is solely for robustness in case a future adapter type introduces biased projections.

_get_base_hf_param_names_for_adapter(
mapping_registry: megatron.bridge.models.conversion.mapping_registry.MegatronMappingRegistry,
global_base_prefix: str,
adapter_key: Optional[str],
base_suffix: str,
) List[str]#

Return all HF base parameter names associated with this adapter.

_make_lora_param_name(
base_name: str,
megatron_adapter_suffix: str,
) Optional[str]#

Translate a base HF weight name into its LoRA-specific counterpart.

_is_fused_qkv(hf_weight_names: Iterable[str]) bool#

Check whether the provided HF names correspond to a fused QKV weight.

_is_fused_fc1_gate_up(
base_hf_weight_names: Iterable[str],
linear_out_tensor: torch.Tensor,
base_weight_shape: Optional[torch.Size] = None,
) bool#

Detect fused FC1 (gate/up) adapters based on names and tensor shape.

_infer_qkv_projection_from_name(hf_name: str) Optional[str]#

Return q_proj/k_proj/v_proj identifier based on the HF name.

_infer_hf_expert_idx(hf_name: str) Optional[int]#

Return the expert index embedded in an HF MoE weight name.

_split_qkv_linear_out_weight(
megatron_model: Union[bridge.models.conversion.peft_bridge.MegatronModel, List[bridge.models.conversion.peft_bridge.MegatronModel]],
linear_out_weight: torch.Tensor,
) Dict[str, torch.Tensor]#

Split a fused LoRA linear_out tensor for QKV adapters.

_split_fused_fc1_linear_out_weight(
linear_out_weight: torch.Tensor,
*,
is_expert: bool,
) tuple[torch.Tensor, torch.Tensor]#

Split fused FC1 LoRA linear_out into gate/up with TP-aware ordering.

_gather_expert_adapter_weight(
weight: torch.Tensor,
) Optional[List[torch.Tensor]]#

Gather expert-sharded adapter weights across EP ranks when needed.

_select_expert_adapter_weight(
weight: torch.Tensor,
gathered: List[torch.Tensor],
expert_idx: int,
num_experts: int,
) torch.Tensor#

Select the per-expert adapter weight slice if present.

_megatron_global_adapters_info_all_pp_ranks(
megatron_model: Union[bridge.models.conversion.peft_bridge.MegatronModel, List[bridge.models.conversion.peft_bridge.MegatronModel]],
) List[tuple[str, str, bool, bool, int, int, int, int]]#

Get all adapters’ information tuple: (global_base_name, local_base_prefix, input_is_parallel, base_linear_is_parallel, alpha, dim, pp_rank, vp_stage) across all pipeline parallel ranks.

_construct_adapters_names(
prefix: str,
adapter_key: Optional[str],
) tuple[str, str]#

Build linear_in/linear_out parameter names for an adapter.

Parameters:
  • prefix – Base module prefix without any adapter suffix (global or local, depending on caller).

  • adapter_key – Optional adapter identifier used by CanonicalLoRA (e.g. adapter_q). None for standard single-adapter LoRA modules.

Returns:

Tuple (linear_in_name, linear_out_name) containing the parameter names for the adapter’s input and output projection weights.

build_adapter_conversion_tasks(
megatron_model: Union[bridge.models.conversion.peft_bridge.MegatronModel, List[bridge.models.conversion.peft_bridge.MegatronModel]],
) Dict[str, List[bridge.models.conversion.peft_bridge.AdapterWeightConversionTask]]#

Construct adapter merge tasks keyed by their base parameter.

The returned dict is keyed by the global LoRA-wrapped parameter name (e.g., decoder.layers.0.mlp.linear_fc1.to_wrap.weight). Each value contains the adapter tasks (canonical or regular) that should be merged into that base weight.

materialize_adapter_weights(
adapter_tasks: List[bridge.models.conversion.peft_bridge.AdapterWeightConversionTask],
) List[bridge.models.conversion.peft_bridge.AdapterWeight]#

Run adapter merge tasks to gather full adapter weights.

stream_adapter_weights_megatron_to_hf(
megatron_model: Union[bridge.models.conversion.peft_bridge.MegatronModel, List[bridge.models.conversion.peft_bridge.MegatronModel]],
cpu: bool = True,
show_progress: bool = True,
) Iterable[megatron.bridge.models.conversion.model_bridge.HFWeightTuple]#

Stream only adapter weights without merging them into base tensors.

_get_fused_adapter_linear_out_slices(
megatron_model: List[bridge.models.conversion.peft_bridge.MegatronModel],
base_hf_weight_names: List[str],
linear_out_tensor: torch.Tensor,
is_expert: bool = False,
) Optional[Dict[str, torch.Tensor]]#

Return per-base-name linear_out slices for fused adapters, else None.

This supports fused QKV adapters (split into q/k/v) and fused FC1 adapters (split into gate/up along dim=0). The returned dict is keyed by the HF base weight name (e.g. ...q_proj.weight or ...gate_proj.weight).

_merge_lora_adapter_weights(
megatron_model: List[bridge.models.conversion.peft_bridge.MegatronModel],
converted_weights_dict: Dict[str, torch.Tensor],
adapter_weights: List[bridge.models.conversion.peft_bridge.AdapterWeight],
) Dict[str, torch.Tensor]#

Merge LoRA adapter weights back into the base tensor for HF export.

_merge_single_adapter_weight(
base_weight: torch.Tensor,
alpha: int,
dim: int,
linear_in_weight: torch.Tensor,
linear_out_weight: torch.Tensor,
) torch.Tensor#

Merge a single adapter’s weights with base weight.

_merge_canonical_adapter_from_weights(
megatron_model: List[bridge.models.conversion.peft_bridge.MegatronModel],
converted_weights_dict: Dict[str, torch.Tensor],
adapter_weights: List[bridge.models.conversion.peft_bridge.AdapterWeight],
) Dict[str, torch.Tensor]#

Merge CanonicalLoRA adapters using pre-materialized adapter weights.