C/C++ API
The C/C++ API allows you to access the custom kernels defined in libtransformer_engine.so library directly from C/C++, without Python.
- activation.h
- cast.h
- gemm.h
- fused_attn.h
NVTE_QKV_Layout
NVTE_QKV_Layout::NVTE_SB3HD
NVTE_QKV_Layout::NVTE_SBH3D
NVTE_QKV_Layout::NVTE_SBHD_SB2HD
NVTE_QKV_Layout::NVTE_SBHD_SBH2D
NVTE_QKV_Layout::NVTE_SBHD_SBHD_SBHD
NVTE_QKV_Layout::NVTE_BS3HD
NVTE_QKV_Layout::NVTE_BSH3D
NVTE_QKV_Layout::NVTE_BSHD_BS2HD
NVTE_QKV_Layout::NVTE_BSHD_BSH2D
NVTE_QKV_Layout::NVTE_BSHD_BSHD_BSHD
NVTE_QKV_Layout::NVTE_T3HD
NVTE_QKV_Layout::NVTE_TH3D
NVTE_QKV_Layout::NVTE_THD_T2HD
NVTE_QKV_Layout::NVTE_THD_TH2D
NVTE_QKV_Layout::NVTE_THD_THD_THD
NVTE_QKV_Layout_Group
NVTE_QKV_Format
NVTE_Bias_Type
NVTE_Mask_Type
NVTE_Fused_Attn_Backend
nvte_get_qkv_layout_group()
nvte_get_qkv_format()
nvte_get_fused_attn_backend()
nvte_fused_attn_fwd_qkvpacked()
nvte_fused_attn_bwd_qkvpacked()
nvte_fused_attn_fwd_kvpacked()
nvte_fused_attn_bwd_kvpacked()
nvte_fused_attn_fwd()
nvte_fused_attn_bwd()
- layer_norm.h
- rmsnorm.h
- softmax.h
nvte_scaled_softmax_forward()
nvte_scaled_softmax_backward()
nvte_scaled_masked_softmax_forward()
nvte_scaled_masked_softmax_backward()
nvte_scaled_upper_triang_masked_softmax_forward()
nvte_scaled_upper_triang_masked_softmax_backward()
nvte_scaled_aligned_causal_masked_softmax_forward()
nvte_scaled_aligned_causal_masked_softmax_backward()
- transformer_engine.h
NVTETensor
NVTEDType
nvte_create_tensor()
nvte_destroy_tensor()
nvte_tensor_data()
nvte_tensor_shape()
nvte_tensor_ndims()
nvte_tensor_size()
nvte_tensor_numel()
nvte_tensor_element_size()
nvte_tensor_type()
nvte_tensor_amax()
nvte_tensor_scale()
nvte_tensor_scale_inv()
nvte_tensor_pack_create()
nvte_tensor_pack_destroy()
NVTEShape
NVTETensorPack
transformer_engine
transformer_engine::DType
transformer_engine::DType::kByte
transformer_engine::DType::kInt32
transformer_engine::DType::kInt64
transformer_engine::DType::kFloat32
transformer_engine::DType::kFloat16
transformer_engine::DType::kBFloat16
transformer_engine::DType::kFloat8E4M3
transformer_engine::DType::kFloat8E5M2
transformer_engine::DType::kNumTypes
transformer_engine::TensorWrapper
transformer_engine::TensorWrapper::TensorWrapper()
transformer_engine::TensorWrapper::TensorWrapper()
transformer_engine::TensorWrapper::TensorWrapper()
transformer_engine::TensorWrapper::~TensorWrapper()
transformer_engine::TensorWrapper::operator=()
transformer_engine::TensorWrapper::TensorWrapper()
transformer_engine::TensorWrapper::TensorWrapper()
transformer_engine::TensorWrapper::operator=()
transformer_engine::TensorWrapper::data()
transformer_engine::TensorWrapper::shape()
transformer_engine::TensorWrapper::size()
transformer_engine::TensorWrapper::ndim()
transformer_engine::TensorWrapper::numel()
transformer_engine::TensorWrapper::element_size()
transformer_engine::TensorWrapper::bytes()
transformer_engine::TensorWrapper::dtype()
transformer_engine::TensorWrapper::dptr()
transformer_engine::TensorWrapper::amax()
transformer_engine::TensorWrapper::scale()
transformer_engine::TensorWrapper::scale_inv()
transformer_engine::TensorWrapper::tensor_
- transpose.h
nvte_cast_transpose()
nvte_transpose()
nvte_cast_transpose_dbias()
nvte_fp8_transpose_dbias()
nvte_multi_cast_transpose()
nvte_cast_transpose_dbias_dgelu()
nvte_cast_transpose_dbias_dsilu()
nvte_cast_transpose_dbias_drelu()
nvte_cast_transpose_dbias_dqgelu()
nvte_cast_transpose_dbias_dsrelu()
nvte_dgeglu_cast_transpose()
nvte_dswiglu_cast_transpose()
nvte_dreglu_cast_transpose()
nvte_dqgeglu_cast_transpose()
nvte_dsreglu_cast_transpose()