bridge.diffusion.models.wan.wan_provider#

Module Contents#

Classes#

Data#

API#

bridge.diffusion.models.wan.wan_provider.logger#

‘getLogger(…)’

class bridge.diffusion.models.wan.wan_provider.WanModelProvider#

Bases: megatron.bridge.models.transformer_config.TransformerConfig, megatron.bridge.models.model_provider.ModelProviderMixin[megatron.core.models.common.vision_module.vision_module.VisionModule]

crossattn_emb_size: int#

1536

add_bias_linear: bool#

True

gated_linear_unit: bool#

False

num_layers: int#

30

hidden_size: int#

1536

ffn_hidden_size: int#

8960

num_attention_heads: int#

12

layernorm_epsilon: float#

1e-06

normalization: str#

‘RMSNorm’

layernorm_zero_centered_gamma: bool#

False

layernorm_across_heads: bool#

True

add_qkv_bias: bool#

True

rotary_interleaved: bool#

True

activation_func: Callable#

None

hidden_dropout: float#

0

attention_dropout: float#

0

fp16_lm_cross_entropy: bool#

False

parallel_output: bool#

True

bf16: bool#

False

params_dtype: torch.dtype#

None

qkv_format: str#

‘thd’

apply_rope_fusion: bool#

False

bias_activation_fusion: bool#

True

seq_length: int#

1024

share_embeddings_and_output_weights: bool#

False

vocab_size: int#

None

make_vocab_size_divisible_by: int#

128

in_channels: int#

16

out_channels: int#

16

patch_spatial: int#

2

patch_temporal: int#

1

freq_dim: int#

256

text_len: int#

512

text_dim: int#

4096

provide(
pre_process=None,
post_process=None,
vp_stage=None,
) megatron.bridge.diffusion.models.wan.wan_model.WanModel#