bridge.models.conversion.peft_bridge#

Module Contents#

Classes#

AdapterWeightConversionTask

Task describing an adapter’s LoRA weights for conversion or merging.

AdapterWeight

Materialized adapter weights ready for merge.

MegatronPeftBridge

Mixin providing adapter-aware utilities for Megatron model bridges.

Functions#

_select_hf_base_param_name

Return the HF base parameter name associated with this adapter.

infer_target_modules_from_adapter_weights

Derive HF target_modules from the HF-format adapter weight names.

build_adapter_config_dict

Build an HF PEFT-compatible adapter_config.json dictionary.

Data#

API#

bridge.models.conversion.peft_bridge.MegatronModel#

‘TypeVar(…)’

bridge.models.conversion.peft_bridge.ADAPTER_NAME_MAP#

None

bridge.models.conversion.peft_bridge.ADAPTER_KEY_TO_SUFFIX#

None

bridge.models.conversion.peft_bridge.MEGATRON_TO_HF_LORA_SUFFIX#

None

bridge.models.conversion.peft_bridge.GDN_IN_PROJ_KEYS#

(‘in_proj_qkv’, ‘in_proj_z’, ‘in_proj_b’, ‘in_proj_a’)

class bridge.models.conversion.peft_bridge.AdapterWeightConversionTask#

Task describing an adapter’s LoRA weights for conversion or merging.

global_base_prefix: str#

None

adapter_key: Optional[str]#

None

alpha: int#

None

dim: int#

None

linear_in_task: megatron.bridge.models.conversion.model_bridge.WeightConversionTask#

None

linear_out_task: megatron.bridge.models.conversion.model_bridge.WeightConversionTask#

None

class bridge.models.conversion.peft_bridge.AdapterWeight#

Materialized adapter weights ready for merge.

global_base_prefix: str#

None

adapter_key: Optional[str]#

None

alpha: int#

None

dim: int#

None

linear_in_weight: megatron.bridge.models.conversion.model_bridge.MegatronWeightTuple#

None

linear_out_weight: megatron.bridge.models.conversion.model_bridge.MegatronWeightTuple#

None

bridge.models.conversion.peft_bridge._select_hf_base_param_name(
base_mapping,
adapter_key: Optional[str],
expected_suffix: str,
) Optional[str]#

Return the HF base parameter name associated with this adapter.

class bridge.models.conversion.peft_bridge.MegatronPeftBridge#

Mixin providing adapter-aware utilities for Megatron model bridges.

_get_lora_unwrapped_name(megatron_param: str) str#

Remove .to_wrap from LoRA parameter names.

_is_adapter_param_name(param_name: str) bool#

Return True if the parameter only belongs to a PEFT adapter.

_get_adapter_wrap_module(
local_base_prefix: str,
megatron_model: Union[bridge.models.conversion.peft_bridge.MegatronModel, List[bridge.models.conversion.peft_bridge.MegatronModel]],
vp_stage: int,
) tuple[Optional[torch.nn.Module], Optional[torch.nn.Module]]#

Locate the adapter wrapper and its underlying module.

_resolve_hf_adapter_param_name(
mapping_registry: megatron.bridge.models.conversion.mapping_registry.MegatronMappingRegistry,
global_base_prefix: str,
megatron_adapter_suffix: str,
base_suffix: str,
adapter_key: Optional[str],
) Optional[str]#

Resolve the HuggingFace adapter parameter name by translating the base Megatron name.

.. note::

LoRA adapters never register bias tensors for linear_in / linear_out, so callers only pass weight suffixes here. The bias fallback below is solely for robustness in case a future adapter type introduces biased projections.

_get_base_hf_param_names_for_adapter(
mapping_registry: megatron.bridge.models.conversion.mapping_registry.MegatronMappingRegistry,
global_base_prefix: str,
adapter_key: Optional[str],
base_suffix: str,
) List[str]#

Return all HF base parameter names associated with this adapter.

_make_lora_param_name(
base_name: str,
megatron_adapter_suffix: str,
) Optional[str]#

Translate a base HF weight name into its LoRA-specific counterpart.

_is_fused_qkv(hf_weight_names: Iterable[str]) bool#

Check whether the provided HF names correspond to a fused QKV weight.

_is_gdn_in_proj_split(hf_weight_names: Iterable[str]) bool#

Check whether the provided HF names correspond to split GDN in_proj weights.

_is_fused_fc1_gate_up(
base_hf_weight_names: Iterable[str],
linear_out_tensor: torch.Tensor,
base_weight_shape: Optional[torch.Size] = None,
) bool#

Detect fused FC1 (gate/up) adapters based on names and tensor shape.

_infer_qkv_projection_from_name(hf_name: str) Optional[str]#

Return q_proj/k_proj/v_proj identifier based on the HF name.

_infer_gdn_in_proj_projection_from_name(
hf_name: str,
) Optional[str]#

Return in_proj_qkv/z/b/a identifier based on the HF name.

_infer_hf_expert_idx(hf_name: str) Optional[int]#

Return the expert index embedded in an HF MoE weight name.

_split_qkv_linear_out_weight(
megatron_model: Union[bridge.models.conversion.peft_bridge.MegatronModel, List[bridge.models.conversion.peft_bridge.MegatronModel]],
linear_out_weight: torch.Tensor,
) Dict[str, torch.Tensor]#

Split a fused LoRA linear_out tensor for QKV adapters.

_split_gdn_in_proj_linear_out_weight(
megatron_model: Union[bridge.models.conversion.peft_bridge.MegatronModel, List[bridge.models.conversion.peft_bridge.MegatronModel]],
linear_out_weight: torch.Tensor,
) Dict[str, torch.Tensor]#

Split a fused LoRA linear_out tensor for GDN in_proj adapters.

_build_lora_hf_names(
base_hf_weight_names: List[str],
) tuple[List[str], List[str]]#

Build LoRA A/B names for a list of HF base parameter names.

_collect_packed_expert_adapter_tensors(
linear_in_tensor: torch.Tensor,
linear_out_tensor: torch.Tensor,
expert_linear_in_gathered: Optional[List[torch.Tensor]],
expert_linear_out_gathered: Optional[List[torch.Tensor]],
num_moe_experts: int,
) tuple[List[torch.Tensor], List[torch.Tensor]]#

Collect one LoRA A/B tensor per expert for grouped expert exports.

_build_packed_expert_linear_out_by_base(
megatron_model: List[bridge.models.conversion.peft_bridge.MegatronModel],
base_hf_weight_names: List[str],
per_expert_linear_out: List[torch.Tensor],
is_expert: bool,
) Dict[str, torch.Tensor]#

Build per-base stacked LoRA-B tensors for packed grouped-expert export.

_split_fused_fc1_linear_out_weight(
linear_out_weight: torch.Tensor,
*,
is_expert: bool,
) tuple[torch.Tensor, torch.Tensor]#

Split fused FC1 LoRA linear_out into gate/up with TP-aware ordering.

_gather_expert_adapter_weight(
weight: torch.Tensor,
) Optional[List[torch.Tensor]]#

Gather expert-sharded adapter weights across EP ranks when needed.

_select_expert_adapter_weight(
weight: torch.Tensor,
gathered: List[torch.Tensor],
expert_idx: int,
num_experts: int,
) torch.Tensor#

Select the per-expert adapter weight slice if present.

_megatron_global_adapters_info_all_pp_ranks(
megatron_model: Union[bridge.models.conversion.peft_bridge.MegatronModel, List[bridge.models.conversion.peft_bridge.MegatronModel]],
) List[tuple[str, str, bool, bool, int, int, int, int]]#

Get all adapters’ information tuple: (global_base_name, local_base_prefix, input_is_parallel, base_linear_is_parallel, alpha, dim, pp_rank, vp_stage) across all pipeline parallel ranks.

_construct_adapters_names(
prefix: str,
adapter_key: Optional[str],
) tuple[str, str]#

Build linear_in/linear_out parameter names for an adapter.

Parameters:
  • prefix – Base module prefix without any adapter suffix (global or local, depending on caller).

  • adapter_key – Optional adapter identifier used by CanonicalLoRA (e.g. adapter_q). None for standard single-adapter LoRA modules.

Returns:

Tuple (linear_in_name, linear_out_name) containing the parameter names for the adapter’s input and output projection weights.

build_adapter_conversion_tasks(
megatron_model: Union[bridge.models.conversion.peft_bridge.MegatronModel, List[bridge.models.conversion.peft_bridge.MegatronModel]],
) Dict[str, List[bridge.models.conversion.peft_bridge.AdapterWeightConversionTask]]#

Construct adapter merge tasks keyed by their base parameter.

The returned dict is keyed by the global LoRA-wrapped parameter name (e.g., decoder.layers.0.mlp.linear_fc1.to_wrap.weight). Each value contains the adapter tasks (canonical or regular) that should be merged into that base weight.

materialize_adapter_weights(
adapter_tasks: List[bridge.models.conversion.peft_bridge.AdapterWeightConversionTask],
) List[bridge.models.conversion.peft_bridge.AdapterWeight]#

Run adapter merge tasks to gather full adapter weights.

stream_adapter_weights_megatron_to_hf(
megatron_model: Union[bridge.models.conversion.peft_bridge.MegatronModel, List[bridge.models.conversion.peft_bridge.MegatronModel]],
cpu: bool = True,
show_progress: bool = True,
) Iterable[megatron.bridge.models.conversion.model_bridge.HFWeightTuple]#

Stream only adapter weights without merging them into base tensors.

_get_fused_adapter_linear_out_slices(
megatron_model: List[bridge.models.conversion.peft_bridge.MegatronModel],
base_hf_weight_names: List[str],
linear_out_tensor: torch.Tensor,
is_expert: bool = False,
) Optional[Dict[str, torch.Tensor]]#

Return per-base-name linear_out slices for fused adapters, else None.

This supports fused QKV adapters (split into q/k/v) and fused FC1 adapters (split into gate/up along dim=0). The returned dict is keyed by the HF base weight name (e.g. ...q_proj.weight or ...gate_proj.weight).

_merge_lora_adapter_weights(
megatron_model: List[bridge.models.conversion.peft_bridge.MegatronModel],
converted_weights_dict: Dict[str, torch.Tensor],
adapter_weights: List[bridge.models.conversion.peft_bridge.AdapterWeight],
) Dict[str, torch.Tensor]#

Merge LoRA adapter weights back into the base tensor for HF export.

_merge_single_adapter_weight(
base_weight: torch.Tensor,
alpha: int,
dim: int,
linear_in_weight: torch.Tensor,
linear_out_weight: torch.Tensor,
) torch.Tensor#

Merge a single adapter’s weights with base weight.

The merge is performed in float32 to avoid precision loss from bfloat16 matmul (adapter weights are often stored in bf16). The result is cast back to the original base weight dtype.

_merge_canonical_adapter_from_weights(
megatron_model: List[bridge.models.conversion.peft_bridge.MegatronModel],
converted_weights_dict: Dict[str, torch.Tensor],
adapter_weights: List[bridge.models.conversion.peft_bridge.AdapterWeight],
) Dict[str, torch.Tensor]#

Merge CanonicalLoRA adapters using pre-materialized adapter weights.

bridge.models.conversion.peft_bridge._HF_LORA_SUFFIXES#

(‘.lora_A.weight’, ‘.lora_B.weight’)

bridge.models.conversion.peft_bridge.infer_target_modules_from_adapter_weights(
adapter_weight_names: Iterable[str],
) List[str]#

Derive HF target_modules from the HF-format adapter weight names.

Given names like model.layers.0.self_attn.q_proj.lora_A.weight, this extracts the unique module identifiers (q_proj, gate_proj, …) that the peft library expects in adapter_config.json.

bridge.models.conversion.peft_bridge.build_adapter_config_dict(
peft_config: megatron.bridge.peft.base.PEFT,
target_modules: List[str],
base_model_name_or_path: Optional[str] = None,
) Dict[str, object]#

Build an HF PEFT-compatible adapter_config.json dictionary.

The returned dict can be serialised directly with json.dump and is loadable by peft.PeftModel.from_pretrained without any runtime dependency on the peft pip package.