C/C++ API
The C/C++ API allows you to access the custom kernels defined in libtransformer_engine.so library directly from C/C++, without Python.
Headers
- activation.h
- cast.h
- gemm.h
- fused_attn.h
NVTE_QKV_LayoutNVTE_QKV_Layout::NVTE_SB3HDNVTE_QKV_Layout::NVTE_SBH3DNVTE_QKV_Layout::NVTE_SBHD_SB2HDNVTE_QKV_Layout::NVTE_SBHD_SBH2DNVTE_QKV_Layout::NVTE_SBHD_SBHD_SBHDNVTE_QKV_Layout::NVTE_BS3HDNVTE_QKV_Layout::NVTE_BSH3DNVTE_QKV_Layout::NVTE_BSHD_BS2HDNVTE_QKV_Layout::NVTE_BSHD_BSH2DNVTE_QKV_Layout::NVTE_BSHD_BSHD_BSHDNVTE_QKV_Layout::NVTE_T3HDNVTE_QKV_Layout::NVTE_TH3DNVTE_QKV_Layout::NVTE_THD_T2HDNVTE_QKV_Layout::NVTE_THD_TH2DNVTE_QKV_Layout::NVTE_THD_THD_THD
NVTE_QKV_Layout_GroupNVTE_QKV_FormatNVTE_Bias_TypeNVTE_Mask_TypeNVTE_Fused_Attn_Backendnvte_get_qkv_layout_group()nvte_get_qkv_format()nvte_get_fused_attn_backend()nvte_fused_attn_fwd_qkvpacked()nvte_fused_attn_bwd_qkvpacked()nvte_fused_attn_fwd_kvpacked()nvte_fused_attn_bwd_kvpacked()nvte_fused_attn_fwd()nvte_fused_attn_bwd()
- layer_norm.h
- rmsnorm.h
- softmax.h
nvte_scaled_softmax_forward()nvte_scaled_softmax_backward()nvte_scaled_masked_softmax_forward()nvte_scaled_masked_softmax_backward()nvte_scaled_upper_triang_masked_softmax_forward()nvte_scaled_upper_triang_masked_softmax_backward()nvte_scaled_aligned_causal_masked_softmax_forward()nvte_scaled_aligned_causal_masked_softmax_backward()
- transformer_engine.h
NVTETensorNVTEDTypenvte_create_tensor()nvte_destroy_tensor()nvte_tensor_data()nvte_tensor_shape()nvte_tensor_ndims()nvte_tensor_size()nvte_tensor_numel()nvte_tensor_element_size()nvte_tensor_type()nvte_tensor_amax()nvte_tensor_scale()nvte_tensor_scale_inv()nvte_tensor_pack_create()nvte_tensor_pack_destroy()NVTEShapeNVTETensorPacktransformer_enginetransformer_engine::DTypetransformer_engine::DType::kBytetransformer_engine::DType::kInt32transformer_engine::DType::kInt64transformer_engine::DType::kFloat32transformer_engine::DType::kFloat16transformer_engine::DType::kBFloat16transformer_engine::DType::kFloat8E4M3transformer_engine::DType::kFloat8E5M2transformer_engine::DType::kNumTypes
transformer_engine::TensorWrappertransformer_engine::TensorWrapper::TensorWrapper()transformer_engine::TensorWrapper::TensorWrapper()transformer_engine::TensorWrapper::TensorWrapper()transformer_engine::TensorWrapper::~TensorWrapper()transformer_engine::TensorWrapper::operator=()transformer_engine::TensorWrapper::TensorWrapper()transformer_engine::TensorWrapper::TensorWrapper()transformer_engine::TensorWrapper::operator=()transformer_engine::TensorWrapper::data()transformer_engine::TensorWrapper::shape()transformer_engine::TensorWrapper::size()transformer_engine::TensorWrapper::ndim()transformer_engine::TensorWrapper::numel()transformer_engine::TensorWrapper::element_size()transformer_engine::TensorWrapper::bytes()transformer_engine::TensorWrapper::dtype()transformer_engine::TensorWrapper::dptr()transformer_engine::TensorWrapper::amax()transformer_engine::TensorWrapper::scale()transformer_engine::TensorWrapper::scale_inv()transformer_engine::TensorWrapper::tensor_
- transpose.h
nvte_cast_transpose()nvte_transpose()nvte_cast_transpose_dbias()nvte_fp8_transpose_dbias()nvte_multi_cast_transpose()nvte_cast_transpose_dbias_dgelu()nvte_cast_transpose_dbias_dsilu()nvte_cast_transpose_dbias_drelu()nvte_cast_transpose_dbias_dqgelu()nvte_cast_transpose_dbias_dsrelu()nvte_dgeglu_cast_transpose()nvte_dswiglu_cast_transpose()nvte_dreglu_cast_transpose()nvte_dqgeglu_cast_transpose()nvte_dsreglu_cast_transpose()