Getting Started
Python API documentation
Format
DelayedScaling
Linear
Linear.forward()
Linear.set_tensor_parallel_group()
GroupedLinear
GroupedLinear.forward()
GroupedLinear.set_tensor_parallel_group()
LayerNorm
RMSNorm
LayerNormLinear
LayerNormLinear.forward()
LayerNormLinear.set_tensor_parallel_group()
LayerNormMLP
LayerNormMLP.forward()
LayerNormMLP.set_tensor_parallel_group()
DotProductAttention
DotProductAttention.forward()
DotProductAttention.set_context_parallel_group()
MultiheadAttention
MultiheadAttention.forward()
MultiheadAttention.set_context_parallel_group()
MultiheadAttention.set_tensor_parallel_group()
TransformerLayer
TransformerLayer.forward()
TransformerLayer.set_context_parallel_group()
TransformerLayer.set_tensor_parallel_group()
InferenceParams
CudaRNGStatesTracker
CudaRNGStatesTracker.add()
CudaRNGStatesTracker.fork()
CudaRNGStatesTracker.get_states()
CudaRNGStatesTracker.reset()
CudaRNGStatesTracker.set_states()
fp8_autocast()
fp8_model_init()
checkpoint()
onnx_export()
make_graphed_callables()
get_cpu_offload_context()
moe_permute()
moe_unpermute()
TransformerLayerType
MeshResource
update_collections()
DenseGeneral
LayerNormDenseGeneral
RelativePositionBiases
MultiHeadAttention
extend_logical_axis_rules()
FusedScaleMaskSoftmax
FusedScaleMaskSoftmax.forward()
MultiHeadAttention.forward()
recompute()
Examples and Tutorials
LlamaModel
LlamaDecoderLayer
BF16
FP8
Advanced
NVTE_Activation_Type
NVTE_Activation_Type::GELU
NVTE_Activation_Type::GEGLU
NVTE_Activation_Type::SILU
NVTE_Activation_Type::SWIGLU
NVTE_Activation_Type::RELU
NVTE_Activation_Type::REGLU
NVTE_Activation_Type::QGELU
NVTE_Activation_Type::QGEGLU
NVTE_Activation_Type::SRELU
NVTE_Activation_Type::SREGLU
nvte_gelu()
nvte_silu()
nvte_relu()
nvte_qgelu()
nvte_srelu()
nvte_dgelu()
nvte_dsilu()
nvte_drelu()
nvte_dqgelu()
nvte_dsrelu()
nvte_geglu()
nvte_swiglu()
nvte_reglu()
nvte_qgeglu()
nvte_sreglu()
nvte_dgeglu()
nvte_dswiglu()
nvte_dreglu()
nvte_dqgeglu()
nvte_dsreglu()
nvte_fp8_quantize()
nvte_fp8_dequantize()
nvte_cublas_gemm()
nvte_cublas_atomic_gemm()
nvte_multi_stream_cublas_gemm()
transformer_engine
transformer_engine::num_streams
NVTE_QKV_Layout
NVTE_QKV_Layout::NVTE_SB3HD
NVTE_QKV_Layout::NVTE_SBH3D
NVTE_QKV_Layout::NVTE_SBHD_SB2HD
NVTE_QKV_Layout::NVTE_SBHD_SBH2D
NVTE_QKV_Layout::NVTE_SBHD_SBHD_SBHD
NVTE_QKV_Layout::NVTE_BS3HD
NVTE_QKV_Layout::NVTE_BSH3D
NVTE_QKV_Layout::NVTE_BSHD_BS2HD
NVTE_QKV_Layout::NVTE_BSHD_BSH2D
NVTE_QKV_Layout::NVTE_BSHD_BSHD_BSHD
NVTE_QKV_Layout::NVTE_T3HD
NVTE_QKV_Layout::NVTE_TH3D
NVTE_QKV_Layout::NVTE_THD_T2HD
NVTE_QKV_Layout::NVTE_THD_TH2D
NVTE_QKV_Layout::NVTE_THD_THD_THD
NVTE_QKV_Layout_Group
NVTE_QKV_Layout_Group::NVTE_3HD
NVTE_QKV_Layout_Group::NVTE_H3D
NVTE_QKV_Layout_Group::NVTE_HD_2HD
NVTE_QKV_Layout_Group::NVTE_HD_H2D
NVTE_QKV_Layout_Group::NVTE_HD_HD_HD
NVTE_QKV_Format
NVTE_QKV_Format::NVTE_SBHD
NVTE_QKV_Format::NVTE_BSHD
NVTE_QKV_Format::NVTE_THD
NVTE_Bias_Type
NVTE_Bias_Type::NVTE_NO_BIAS
NVTE_Bias_Type::NVTE_PRE_SCALE_BIAS
NVTE_Bias_Type::NVTE_POST_SCALE_BIAS
NVTE_Bias_Type::NVTE_ALIBI
NVTE_Mask_Type
NVTE_Mask_Type::NVTE_NO_MASK
NVTE_Mask_Type::NVTE_PADDING_MASK
NVTE_Mask_Type::NVTE_CAUSAL_MASK
NVTE_Mask_Type::NVTE_PADDING_CAUSAL_MASK
NVTE_Mask_Type::NVTE_CAUSAL_BOTTOM_RIGHT_MASK
NVTE_Mask_Type::NVTE_PADDING_CAUSAL_BOTTOM_RIGHT_MASK
NVTE_Fused_Attn_Backend
NVTE_Fused_Attn_Backend::NVTE_No_Backend
NVTE_Fused_Attn_Backend::NVTE_F16_max512_seqlen
NVTE_Fused_Attn_Backend::NVTE_F16_arbitrary_seqlen
NVTE_Fused_Attn_Backend::NVTE_FP8
nvte_get_qkv_layout_group()
nvte_get_qkv_format()
nvte_get_fused_attn_backend()
nvte_fused_attn_fwd_qkvpacked()
nvte_fused_attn_bwd_qkvpacked()
nvte_fused_attn_fwd_kvpacked()
nvte_fused_attn_bwd_kvpacked()
nvte_fused_attn_fwd()
nvte_fused_attn_bwd()
nvte_layernorm_fwd()
nvte_layernorm1p_fwd()
nvte_layernorm_bwd()
nvte_layernorm1p_bwd()
nvte_rmsnorm_fwd()
nvte_rmsnorm1p_fwd()
nvte_rmsnorm_bwd()
nvte_rmsnorm1p_bwd()
nvte_scaled_softmax_forward()
nvte_scaled_softmax_backward()
nvte_scaled_masked_softmax_forward()
nvte_scaled_masked_softmax_backward()
nvte_scaled_upper_triang_masked_softmax_forward()
nvte_scaled_upper_triang_masked_softmax_backward()
nvte_scaled_aligned_causal_masked_softmax_forward()
nvte_scaled_aligned_causal_masked_softmax_backward()
NVTETensor
NVTEDType
NVTEDType::kNVTEByte
NVTEDType::kNVTEInt32
NVTEDType::kNVTEInt64
NVTEDType::kNVTEFloat32
NVTEDType::kNVTEFloat16
NVTEDType::kNVTEBFloat16
NVTEDType::kNVTEFloat8E4M3
NVTEDType::kNVTEFloat8E5M2
NVTEDType::kNVTENumTypes
nvte_create_tensor()
nvte_destroy_tensor()
nvte_tensor_type()
nvte_tensor_shape()
nvte_tensor_data()
nvte_tensor_amax()
nvte_tensor_scale()
nvte_tensor_scale_inv()
nvte_tensor_pack_create()
nvte_tensor_pack_destroy()
NVTEShape
NVTEShape::data
NVTEShape::ndim
NVTETensorPack
NVTETensorPack::tensors
NVTETensorPack::size
NVTETensorPack::MAX_SIZE
transformer_engine::DType
transformer_engine::TensorWrapper
nvte_cast_transpose()
nvte_transpose()
nvte_cast_transpose_dbias()
nvte_fp8_transpose_dbias()
nvte_multi_cast_transpose()
nvte_cast_transpose_dbias_dgelu()
nvte_cast_transpose_dbias_dsilu()
nvte_cast_transpose_dbias_drelu()
nvte_cast_transpose_dbias_dqgelu()
nvte_cast_transpose_dbias_dsrelu()
nvte_dgeglu_cast_transpose()
nvte_dswiglu_cast_transpose()
nvte_dreglu_cast_transpose()
nvte_dqgeglu_cast_transpose()
nvte_dsreglu_cast_transpose()
Please activate JavaScript to enable the search functionality.