transformer_engine.h
Base classes and functions of Transformer Engine API.
Typedefs
-
typedef void *NVTETensor
TE Tensor type.
NVTETensor is a contiguous tensor type storing a pointer to data of a given shape and type. It does not own the memory it points to.
-
typedef void *NVTEQuantizationConfig
Configuration for tensor quantization.
Enums
-
enum NVTEDType
TE datatype.
Values:
-
enumerator kNVTEByte
Byte
-
enumerator kNVTEInt32
32-bit integer
-
enumerator kNVTEInt64
64-bit integer
-
enumerator kNVTEFloat32
32-bit float
-
enumerator kNVTEFloat16
16-bit float (E5M10)
-
enumerator kNVTEBFloat16
16-bit bfloat (E8M7)
-
enumerator kNVTEFloat8E4M3
8-bit float (E4M3)
-
enumerator kNVTEFloat8E5M2
8-bit float (E5M2)
-
enumerator kNVTEFloat8E8M0
8-bit float (E8M0)
-
enumerator kNVTENumTypes
Number of supported types
-
enumerator kNVTEByte
-
enum NVTETensorParam
Indicates the kind of the tensor parameter to set/get.
Values:
-
enumerator kNVTERowwiseData
Data usable in rowwise manner
-
enumerator kNVTEColumnwiseData
Data usable in columnwise manner
-
enumerator kNVTEScale
Scale tensor
-
enumerator kNVTEAmax
Amax tensor
-
enumerator kNVTERowwiseScaleInv
Scale inverse tensor for decoding Rowwise Data
-
enumerator kNVTEColumnwiseScaleInv
Scale inverse tensor for decoding Columnwise Data
-
enumerator kNVTENumTensorParams
-
enumerator kNVTERowwiseData
-
enum NVTEScalingMode
Tensor data format.
Values:
-
enumerator NVTE_DELAYED_TENSOR_SCALING
Either an unquantized tensor or an FP8 tensor with per-tensor scaling
Not necessary used for delayed tensor scaling. The unintuitive name reflects legacy usage.
-
enumerator NVTE_MXFP8_1D_SCALING
Single scale per block of 32 elements consecutive in either rowwise or columnwise direction
-
enumerator NVTE_INVALID_SCALING
-
enumerator NVTE_DELAYED_TENSOR_SCALING
-
enum NVTEQuantizationConfigAttribute
Type of option for tensor quantization.
Values:
-
enumerator kNVTEQuantizationConfigForcePow2Scales
Whether to force power of 2 scales
-
enumerator kNVTEQuantizationConfigAmaxEpsilon
Small value to add to amax for numerical stability
-
enumerator kNVTEQuantizationConfigNumAttributes
-
enumerator kNVTEQuantizationConfigForcePow2Scales
Functions
-
NVTETensor nvte_create_tensor(NVTEScalingMode scaling_mode)
Create a new TE tensor.
Create a new TE tensor. Before use its parameters need to be set. TE tensors are just wrappers on top of raw data and do not own memory.
- Parameters:
scaling_mode – [in] Scaling mode of the tensor.
- Returns:
A new TE tensor.
-
void nvte_destroy_tensor(NVTETensor tensor)
Destroy a TE tensor.
Since the TE tensor does not own memory, the underlying data is not freed during this operation.
- Parameters:
tensor – [in] Tensor to be destroyed.
-
void *nvte_tensor_data(const NVTETensor tensor)
Get a raw pointer to the tensor’s rowwise data.
- Parameters:
tensor – [in] Tensor.
- Returns:
A raw pointer to tensor’s rowwise data.
-
void *nvte_tensor_columnwise_data(const NVTETensor tensor)
Get a raw pointer to the tensor’s columnwise data.
- Parameters:
tensor – [in] Tensor.
- Returns:
A raw pointer to tensor’s columnwise data.
-
NVTEShape nvte_tensor_shape(const NVTETensor tensor)
Get a tensor’s data shape.
- Parameters:
tensor – [in] Tensor.
- Returns:
A shape of the input tensor.
-
NVTEShape nvte_tensor_columnwise_shape(const NVTETensor tensor)
Get a tensor’s data shape.
- Parameters:
tensor – [in] Tensor.
- Returns:
A shape of the input tensor.
-
size_t nvte_tensor_ndims(const NVTETensor tensor)
Get a tensor’s number of dimensions.
- Parameters:
tensor – [in] Tensor.
- Returns:
Number of tensor dimensions.
-
size_t nvte_tensor_size(const NVTETensor tensor, const size_t dim)
Get the size of a specific tensor dimension.
- Parameters:
tensor – [in] Tensor.
size_t – [in] Dimension index.
- Returns:
Size of the tensor at the specified dimension.
-
size_t nvte_tensor_numel(const NVTETensor tensor)
Get a tensor’s total number of elements.
- Parameters:
tensor – [in] Tensor.
- Returns:
Number of elements in the tensor.
-
size_t nvte_tensor_element_size(const NVTETensor tensor)
Get the byte size for the tensor’s data type.
- Parameters:
tensor – [in] Tensor.
- Returns:
Byte size of the tensor’s data type.
-
NVTEDType nvte_tensor_type(const NVTETensor tensor)
Get a tensor’s data type.
- Parameters:
tensor – [in] Tensor.
- Returns:
A data type of the input tensor.
-
float *nvte_tensor_amax(const NVTETensor tensor)
Get a pointer to the tensor’s amax data.
- Parameters:
tensor – [in] Tensor.
- Returns:
A pointer to tensor’s amax data.
-
float *nvte_tensor_scale(const NVTETensor tensor)
Get a pointer to the tensor’s scale data.
- Parameters:
tensor – [in] Tensor.
- Returns:
A pointer to tensor’s scale data.
-
float *nvte_tensor_scale_inv(const NVTETensor tensor)
Get a pointer to the tensor’s inverse of scale data.
- Parameters:
tensor – [in] Tensor.
- Returns:
A pointer to tensor’s inverse of scale data.
-
NVTEShape nvte_tensor_scale_inv_shape(const NVTETensor tensor)
Get a tensor’s scale_inv shape.
- Parameters:
tensor – [in] Tensor.
- Returns:
A scale_inv shape of the input tensor.
-
void nvte_zero_tensor(const NVTETensor tensor, cudaStream_t stream)
Reset tensor value to zero.
- Parameters:
tensor – [in] Tensor.
- Returns:
A scale_inv shape of the input tensor.
-
void nvte_set_tensor_param(NVTETensor *tensor, NVTETensorParam param_name, const NVTEBasicTensor *param)
Set a parameter of the tensor.
- Parameters:
[in/out] – tensor Tensor.
param_name – [in] The parameter to be set.
param – [in] The value to be set.
-
NVTEBasicTensor nvte_get_tensor_param(const NVTETensor tensor, NVTETensorParam param_name)
Get a value of the parameter of the tensor.
- Parameters:
tensor – [in] Tensor.
param_name – [in] The parameter to be set.
-
NVTEScalingMode nvte_tensor_scaling_mode(const NVTETensor tensor)
Get the granularity of scaling of this tensor.
- Parameters:
tensor – [in] Tensor.
- Returns:
A struct containing the granularity of tensor’s scaling.
-
void nvte_tensor_pack_create(NVTETensorPack *pack)
Create
tensors
in NVTETensorPack.
-
void nvte_tensor_pack_destroy(NVTETensorPack *pack)
Destroy
tensors
in NVTETensorPack.
-
NVTEQuantizationConfig nvte_create_quantization_config()
Create a new quantization config.
- Returns:
A new quantization config.
-
void nvte_get_quantization_config_attribute(NVTEQuantizationConfig config, NVTEQuantizationConfigAttribute attr, void *buf, size_t size_in_bytes, size_t *size_written)
Query an option in quantization config.
- Parameters:
config – [in] Quantization config.
attr – [in] Option type.
buf – [out] Memory address to write option value. Ignored if NULL.
size_in_bytes – [in] Size of buf.
size_written – [out] Number of bytes that have been written to buf. If buf is NULL, then the number of bytes that would have been written.
-
void nvte_set_quantization_config_attribute(NVTEQuantizationConfig config, NVTEQuantizationConfigAttribute attr, const void *buf, size_t size_in_bytes)
Set an option in quantization config.
- Parameters:
config – [in] Quantization config.
attr – [in] Option type.
buf – [out] Memory address to read option value.
size_in_bytes – [in] Size of buf.
-
void nvte_destroy_quantization_config(NVTEQuantizationConfig config)
Destroy a quantization config.
- Parameters:
config – [in] Config to be destroyed.
-
struct NVTEShape
- #include <transformer_engine.h>
Shape of the tensor.
-
struct NVTEBasicTensor
- #include <transformer_engine.h>
A basic tensor type used to populate parameters of NVTETensor. It does not own the memory it points to.
-
struct NVTETensorPack
- #include <transformer_engine.h>
Pack of tensors, generally used for auxiliary outputs.
Public Members
-
NVTETensor tensors[MAX_SIZE]
Wrappers of tensors. They do not hold the associated memory.
-
size_t size = 0
Actual number of tensors in the pack, 0 <= size <= MAX_SIZE.
Public Static Attributes
-
static const int MAX_SIZE = 10
Max number of tensors in the pack. Assumed <= 10.
-
NVTETensor tensors[MAX_SIZE]
-
namespace transformer_engine
Namespace containing C++ API of Transformer Engine.
Enums
-
struct QuantizationConfigWrapper
- #include <transformer_engine.h>
C++ wrapper for NVTEQuantizationConfigWrapper.
Public Functions
-
inline QuantizationConfigWrapper()
-
QuantizationConfigWrapper(const QuantizationConfigWrapper&) = delete
-
QuantizationConfigWrapper &operator=(const QuantizationConfigWrapper&) = delete
-
inline QuantizationConfigWrapper(QuantizationConfigWrapper &&other)
-
inline QuantizationConfigWrapper &operator=(QuantizationConfigWrapper &&other)
-
inline ~QuantizationConfigWrapper()
-
inline operator NVTEQuantizationConfig() const noexcept
Get the underlying NVTEQuantizationConfig.
- Returns:
NVTEQuantizationConfig held by this QuantizationConfigWrapper.
-
inline void set_force_pow_2_scales(bool force_pow_2_scales)
Set whether to force power of 2 scales.
-
inline void set_amax_epsilon(float amax_epsilon)
Set small value to add to amax.
Private Members
-
NVTEQuantizationConfig config_ = nullptr
Wrapped NVTEQuantizationConfig.
-
inline QuantizationConfigWrapper()
-
struct TensorWrapper
- #include <transformer_engine.h>
C++ wrapper for the NVTETensor class.
Public Functions
-
inline TensorWrapper(void *dptr, const NVTEShape &shape, const DType dtype, float *amax_dptr = nullptr, float *scale_dptr = nullptr, float *scale_inv_dptr = nullptr, const NVTEShape scale_inv_shape = defaultShape, const NVTEScalingMode scaling_mode = NVTE_DELAYED_TENSOR_SCALING)
Constructs new TensorWrapper.
Create a new TE tensor with a given shape, datatype and data. TE tensors are just wrappers on top of raw data and do not own memory.
- Parameters:
dptr – [in] Pointer to the tensor data.
shape – [in] Shape of the tensor.
dtype – [in] Data type of the tensor.
amax_dptr – [in] Pointer to the AMAX value.
scale_dptr – [in] Pointer to the scale value.
scale_inv_shape – [in] Shape of scale_inv
scale_inv_dptr – [in] Pointer to the inverse of scale value.
-
inline TensorWrapper(void *dptr, const std::vector<size_t> &shape, const DType dtype, float *amax_dptr = nullptr, float *scale_dptr = nullptr, float *scale_inv_dptr = nullptr, const std::vector<size_t> &scale_inv_shape = {1}, const NVTEScalingMode scaling_mode = NVTE_DELAYED_TENSOR_SCALING)
Constructs new TensorWrapper.
Create a new TE tensor with a given shape, datatype and data. TE tensors are just wrappers on top of raw data and do not own memory.
- Parameters:
dptr – [in] Pointer to the tensor data.
shape – [in] Shape of the tensor.
dtype – [in] Data type of the tensor.
amax_dptr – [in] Pointer to the AMAX value.
scale_dptr – [in] Pointer to the scale value.
scale_inv_shape – [in] Shape of scale_inv
scale_inv_dptr – [in] Pointer to the inverse of scale value.
-
inline explicit TensorWrapper(const NVTEScalingMode scaling_mode = NVTE_DELAYED_TENSOR_SCALING)
Constructs new empty TensorWrapper.
Create a new empty TE tensor which holds nothing.
-
inline ~TensorWrapper()
TensorWrapper destructor.
-
TensorWrapper &operator=(const TensorWrapper &other) = delete
-
TensorWrapper(const TensorWrapper &other) = delete
-
inline TensorWrapper(TensorWrapper &&other)
Constructs new TensorWrapper from existing TensorWrapper.
Pass an existing TE tensor to a new TensorWrapper.
- Parameters:
other – [inout] The source of the data.
-
inline TensorWrapper &operator=(TensorWrapper &&other)
Assign the data from existing TensorWrapper.
Change ownership of an existing TE tensor.
- Parameters:
other – [inout] The source of the data.
-
template<typename ShapeType>
inline TensorWrapper &set_parameter(const NVTETensorParam param, void *dptr, DType type, const ShapeType &shape) noexcept
-
template<typename ShapeType>
inline TensorWrapper &set_rowwise_data(void *dptr, DType type, const ShapeType &shape) noexcept
-
template<typename ShapeType>
inline TensorWrapper &set_columnwise_data(void *dptr, DType type, const ShapeType &shape) noexcept
-
template<typename ShapeType>
inline TensorWrapper &set_scale(void *dptr, DType type, const ShapeType &shape) noexcept
-
template<typename ShapeType>
inline TensorWrapper &set_amax(void *dptr, DType type, const ShapeType &shape) noexcept
-
template<typename ShapeType>
inline TensorWrapper &set_rowwise_scale_inv(void *dptr, DType type, const ShapeType &shape) noexcept
-
template<typename ShapeType>
inline TensorWrapper &set_columnwise_scale_inv(void *dptr, DType type, const ShapeType &shape) noexcept
-
inline NVTEBasicTensor get_parameter(const NVTETensorParam param) const noexcept
-
inline NVTEBasicTensor get_rowwise_data() const noexcept
-
inline NVTEBasicTensor get_columnwise_data() const noexcept
-
inline NVTEBasicTensor get_scale() const noexcept
-
inline NVTEBasicTensor get_amax() const noexcept
-
inline NVTEBasicTensor get_rowwise_scale_inv() const noexcept
-
inline NVTEBasicTensor get_columnwise_scale_inv() const noexcept
-
inline NVTETensor data() const noexcept
Get an underlying NVTETensor.
- Returns:
NVTETensor held by this TensorWrapper.
-
inline const NVTEShape shape() const noexcept
Get the shape of this TensorWrapper.
- Returns:
Shape of this TensorWrapper.
-
inline const NVTEShape columnwise_shape() const noexcept
Get the shape of this TensorWrapper.
- Returns:
Shape of this TensorWrapper.
-
inline size_t size(const size_t dim) const
Get the size of this TensorWrapper in the given dimension.
- Parameters:
size_t – [in] Dimension index.
- Returns:
Size of this TensorWrapper in given dimension.
-
inline size_t ndim() const noexcept
Get the number of dimensions for this TensorWrapper.
- Returns:
Number of dimensions for this TensorWrapper.
-
inline size_t numel() const noexcept
Get the number of allocated elements in the tensor. This will return 0 for tensors with nullptr data even if the TensorWrapper has a non-zero shape.
- Returns:
Number of elements in the tensor.
-
inline size_t element_size() const noexcept
Get the tensor’s element size in bytes.
- Returns:
Element size in bytes.
-
inline size_t bytes() const noexcept
Get the tensor’s allocated size in bytes. This will return 0 for tensors with nullptr data even if the TensorWrapper has a non-zero shape and valid dtype.
- Returns:
Total tensor size in bytes.
-
inline DType dtype() const noexcept
Get the data type of this TensorWrapper.
- Returns:
Data type of this TensorWrapper.
-
inline void *dptr() const noexcept
Get a raw pointer to the tensor’s data.
- Returns:
A raw pointer to tensor’s data.
-
inline void *columnwise_dptr() const noexcept
Get a raw pointer to the tensor’s data.
- Returns:
A raw pointer to tensor’s data.
-
inline float *amax() const noexcept
Get a pointer to the tensor’s amax data.
- Returns:
A pointer to tensor’s amax data.
-
inline float *scale() const noexcept
Get a pointer to the tensor’s scale data.
- Returns:
A pointer to tensor’s scale data.
-
inline float *scale_inv() const noexcept
Get a pointer to the tensor’s inverse of scale data.
- Returns:
A pointer to tensor’s inverse of scale data.
-
inline const NVTEShape scale_inv_shape() const noexcept
Get the scale_inv_shape of this TensorWrapper.
- Returns:
scale_inv_shape of this TensorWrapper.
-
inline NVTEScalingMode scaling_mode() const noexcept
Get a scaling mode of the tensor.
- Returns:
Scaling mode of the tensor.
-
inline void zero_(cudaStream_t stream)
Public Static Attributes
-
static constexpr size_t defaultData = 1
-
static constexpr NVTEShape defaultShape = {&defaultData, 1}
Private Functions
Private Members
-
NVTETensor tensor_ = nullptr
Wrapped NVTETensor.
-
inline TensorWrapper(void *dptr, const NVTEShape &shape, const DType dtype, float *amax_dptr = nullptr, float *scale_dptr = nullptr, float *scale_inv_dptr = nullptr, const NVTEShape scale_inv_shape = defaultShape, const NVTEScalingMode scaling_mode = NVTE_DELAYED_TENSOR_SCALING)
-
struct QuantizationConfigWrapper