transformer_engine.h

Base classes and functions of Transformer Engine API.

Typedefs

typedef void *NVTETensor

TE Tensor type.

NVTETensor is a contiguous tensor type storing a pointer to data of a given shape and type. It does not own the memory it points to.

typedef void *NVTEQuantizationConfig

Configuration for tensor quantization.

Enums

enum NVTEDType

TE datatype.

Values:

enumerator kNVTEByte

Byte

enumerator kNVTEInt32

32-bit integer

enumerator kNVTEInt64

64-bit integer

enumerator kNVTEFloat32

32-bit float

enumerator kNVTEFloat16

16-bit float (E5M10)

enumerator kNVTEBFloat16

16-bit bfloat (E8M7)

enumerator kNVTEFloat8E4M3

8-bit float (E4M3)

enumerator kNVTEFloat8E5M2

8-bit float (E5M2)

enumerator kNVTEFloat8E8M0

8-bit float (E8M0)

enumerator kNVTENumTypes

Number of supported types

enum NVTETensorParam

Indicates the kind of the tensor parameter to set/get.

Values:

enumerator kNVTERowwiseData

Data usable in rowwise manner

enumerator kNVTEColumnwiseData

Data usable in columnwise manner

enumerator kNVTEScale

Scale tensor

enumerator kNVTEAmax

Amax tensor

enumerator kNVTERowwiseScaleInv

Scale inverse tensor for decoding Rowwise Data

enumerator kNVTEColumnwiseScaleInv

Scale inverse tensor for decoding Columnwise Data

enumerator kNVTENumTensorParams
enum NVTEScalingMode

Tensor data format.

Values:

enumerator NVTE_DELAYED_TENSOR_SCALING

Either an unquantized tensor or an FP8 tensor with per-tensor scaling

Not necessary used for delayed tensor scaling. The unintuitive name reflects legacy usage.

enumerator NVTE_MXFP8_1D_SCALING

Single scale per block of 32 elements consecutive in either rowwise or columnwise direction

enumerator NVTE_INVALID_SCALING
enum NVTEQuantizationConfigAttribute

Type of option for tensor quantization.

Values:

enumerator kNVTEQuantizationConfigForcePow2Scales

Whether to force power of 2 scales

enumerator kNVTEQuantizationConfigAmaxEpsilon

Small value to add to amax for numerical stability

enumerator kNVTEQuantizationConfigNumAttributes

Functions

NVTETensor nvte_create_tensor(NVTEScalingMode scaling_mode)

Create a new TE tensor.

Create a new TE tensor. Before use its parameters need to be set. TE tensors are just wrappers on top of raw data and do not own memory.

Parameters:

scaling_mode[in] Scaling mode of the tensor.

Returns:

A new TE tensor.

void nvte_destroy_tensor(NVTETensor tensor)

Destroy a TE tensor.

Since the TE tensor does not own memory, the underlying data is not freed during this operation.

Parameters:

tensor[in] Tensor to be destroyed.

void *nvte_tensor_data(const NVTETensor tensor)

Get a raw pointer to the tensor’s rowwise data.

Parameters:

tensor[in] Tensor.

Returns:

A raw pointer to tensor’s rowwise data.

void *nvte_tensor_columnwise_data(const NVTETensor tensor)

Get a raw pointer to the tensor’s columnwise data.

Parameters:

tensor[in] Tensor.

Returns:

A raw pointer to tensor’s columnwise data.

NVTEShape nvte_tensor_shape(const NVTETensor tensor)

Get a tensor’s data shape.

Parameters:

tensor[in] Tensor.

Returns:

A shape of the input tensor.

NVTEShape nvte_tensor_columnwise_shape(const NVTETensor tensor)

Get a tensor’s data shape.

Parameters:

tensor[in] Tensor.

Returns:

A shape of the input tensor.

size_t nvte_tensor_ndims(const NVTETensor tensor)

Get a tensor’s number of dimensions.

Parameters:

tensor[in] Tensor.

Returns:

Number of tensor dimensions.

size_t nvte_tensor_size(const NVTETensor tensor, const size_t dim)

Get the size of a specific tensor dimension.

Parameters:
  • tensor[in] Tensor.

  • size_t[in] Dimension index.

Returns:

Size of the tensor at the specified dimension.

size_t nvte_tensor_numel(const NVTETensor tensor)

Get a tensor’s total number of elements.

Parameters:

tensor[in] Tensor.

Returns:

Number of elements in the tensor.

size_t nvte_tensor_element_size(const NVTETensor tensor)

Get the byte size for the tensor’s data type.

Parameters:

tensor[in] Tensor.

Returns:

Byte size of the tensor’s data type.

NVTEDType nvte_tensor_type(const NVTETensor tensor)

Get a tensor’s data type.

Parameters:

tensor[in] Tensor.

Returns:

A data type of the input tensor.

float *nvte_tensor_amax(const NVTETensor tensor)

Get a pointer to the tensor’s amax data.

Parameters:

tensor[in] Tensor.

Returns:

A pointer to tensor’s amax data.

float *nvte_tensor_scale(const NVTETensor tensor)

Get a pointer to the tensor’s scale data.

Parameters:

tensor[in] Tensor.

Returns:

A pointer to tensor’s scale data.

float *nvte_tensor_scale_inv(const NVTETensor tensor)

Get a pointer to the tensor’s inverse of scale data.

Parameters:

tensor[in] Tensor.

Returns:

A pointer to tensor’s inverse of scale data.

NVTEShape nvte_tensor_scale_inv_shape(const NVTETensor tensor)

Get a tensor’s scale_inv shape.

Parameters:

tensor[in] Tensor.

Returns:

A scale_inv shape of the input tensor.

void nvte_zero_tensor(const NVTETensor tensor, cudaStream_t stream)

Reset tensor value to zero.

Parameters:

tensor[in] Tensor.

Returns:

A scale_inv shape of the input tensor.

void nvte_set_tensor_param(NVTETensor *tensor, NVTETensorParam param_name, const NVTEBasicTensor *param)

Set a parameter of the tensor.

Parameters:
  • [in/out] – tensor Tensor.

  • param_name[in] The parameter to be set.

  • param[in] The value to be set.

NVTEBasicTensor nvte_get_tensor_param(const NVTETensor tensor, NVTETensorParam param_name)

Get a value of the parameter of the tensor.

Parameters:
  • tensor[in] Tensor.

  • param_name[in] The parameter to be set.

NVTEScalingMode nvte_tensor_scaling_mode(const NVTETensor tensor)

Get the granularity of scaling of this tensor.

Parameters:

tensor[in] Tensor.

Returns:

A struct containing the granularity of tensor’s scaling.

void nvte_tensor_pack_create(NVTETensorPack *pack)

Create tensors in NVTETensorPack.

void nvte_tensor_pack_destroy(NVTETensorPack *pack)

Destroy tensors in NVTETensorPack.

NVTEQuantizationConfig nvte_create_quantization_config()

Create a new quantization config.

Returns:

A new quantization config.

void nvte_get_quantization_config_attribute(NVTEQuantizationConfig config, NVTEQuantizationConfigAttribute attr, void *buf, size_t size_in_bytes, size_t *size_written)

Query an option in quantization config.

Parameters:
  • config[in] Quantization config.

  • attr[in] Option type.

  • buf[out] Memory address to write option value. Ignored if NULL.

  • size_in_bytes[in] Size of buf.

  • size_written[out] Number of bytes that have been written to buf. If buf is NULL, then the number of bytes that would have been written.

void nvte_set_quantization_config_attribute(NVTEQuantizationConfig config, NVTEQuantizationConfigAttribute attr, const void *buf, size_t size_in_bytes)

Set an option in quantization config.

Parameters:
  • config[in] Quantization config.

  • attr[in] Option type.

  • buf[out] Memory address to read option value.

  • size_in_bytes[in] Size of buf.

void nvte_destroy_quantization_config(NVTEQuantizationConfig config)

Destroy a quantization config.

Parameters:

config[in] Config to be destroyed.

struct NVTEShape
#include <transformer_engine.h>

Shape of the tensor.

Public Members

const size_t *data

Shape data, of size ndim.

size_t ndim

Number of dimensions.

struct NVTEBasicTensor
#include <transformer_engine.h>

A basic tensor type used to populate parameters of NVTETensor. It does not own the memory it points to.

Public Members

void *data_ptr
NVTEDType dtype
NVTEShape shape
struct NVTETensorPack
#include <transformer_engine.h>

Pack of tensors, generally used for auxiliary outputs.

Public Members

NVTETensor tensors[MAX_SIZE]

Wrappers of tensors. They do not hold the associated memory.

size_t size = 0

Actual number of tensors in the pack, 0 <= size <= MAX_SIZE.

Public Static Attributes

static const int MAX_SIZE = 10

Max number of tensors in the pack. Assumed <= 10.

namespace transformer_engine

Namespace containing C++ API of Transformer Engine.

Enums

enum class DType

TE datatype.

Values:

enumerator kByte
enumerator kInt32
enumerator kInt64
enumerator kFloat32
enumerator kFloat16
enumerator kBFloat16
enumerator kFloat8E4M3
enumerator kFloat8E5M2
enumerator kFloat8E8M0
enumerator kNumTypes
struct QuantizationConfigWrapper
#include <transformer_engine.h>

C++ wrapper for NVTEQuantizationConfigWrapper.

Public Functions

inline QuantizationConfigWrapper()
QuantizationConfigWrapper(const QuantizationConfigWrapper&) = delete
QuantizationConfigWrapper &operator=(const QuantizationConfigWrapper&) = delete
inline QuantizationConfigWrapper(QuantizationConfigWrapper &&other)
inline QuantizationConfigWrapper &operator=(QuantizationConfigWrapper &&other)
inline ~QuantizationConfigWrapper()
inline operator NVTEQuantizationConfig() const noexcept

Get the underlying NVTEQuantizationConfig.

Returns:

NVTEQuantizationConfig held by this QuantizationConfigWrapper.

inline void set_force_pow_2_scales(bool force_pow_2_scales)

Set whether to force power of 2 scales.

inline void set_amax_epsilon(float amax_epsilon)

Set small value to add to amax.

Private Members

NVTEQuantizationConfig config_ = nullptr

Wrapped NVTEQuantizationConfig.

struct TensorWrapper
#include <transformer_engine.h>

C++ wrapper for the NVTETensor class.

Public Functions

inline TensorWrapper(void *dptr, const NVTEShape &shape, const DType dtype, float *amax_dptr = nullptr, float *scale_dptr = nullptr, float *scale_inv_dptr = nullptr, const NVTEShape scale_inv_shape = defaultShape, const NVTEScalingMode scaling_mode = NVTE_DELAYED_TENSOR_SCALING)

Constructs new TensorWrapper.

Create a new TE tensor with a given shape, datatype and data. TE tensors are just wrappers on top of raw data and do not own memory.

Parameters:
  • dptr[in] Pointer to the tensor data.

  • shape[in] Shape of the tensor.

  • dtype[in] Data type of the tensor.

  • amax_dptr[in] Pointer to the AMAX value.

  • scale_dptr[in] Pointer to the scale value.

  • scale_inv_shape[in] Shape of scale_inv

  • scale_inv_dptr[in] Pointer to the inverse of scale value.

inline TensorWrapper(void *dptr, const std::vector<size_t> &shape, const DType dtype, float *amax_dptr = nullptr, float *scale_dptr = nullptr, float *scale_inv_dptr = nullptr, const std::vector<size_t> &scale_inv_shape = {1}, const NVTEScalingMode scaling_mode = NVTE_DELAYED_TENSOR_SCALING)

Constructs new TensorWrapper.

Create a new TE tensor with a given shape, datatype and data. TE tensors are just wrappers on top of raw data and do not own memory.

Parameters:
  • dptr[in] Pointer to the tensor data.

  • shape[in] Shape of the tensor.

  • dtype[in] Data type of the tensor.

  • amax_dptr[in] Pointer to the AMAX value.

  • scale_dptr[in] Pointer to the scale value.

  • scale_inv_shape[in] Shape of scale_inv

  • scale_inv_dptr[in] Pointer to the inverse of scale value.

inline explicit TensorWrapper(const NVTEScalingMode scaling_mode = NVTE_DELAYED_TENSOR_SCALING)

Constructs new empty TensorWrapper.

Create a new empty TE tensor which holds nothing.

inline ~TensorWrapper()

TensorWrapper destructor.

TensorWrapper &operator=(const TensorWrapper &other) = delete
TensorWrapper(const TensorWrapper &other) = delete
inline TensorWrapper(TensorWrapper &&other)

Constructs new TensorWrapper from existing TensorWrapper.

Pass an existing TE tensor to a new TensorWrapper.

Parameters:

other[inout] The source of the data.

inline TensorWrapper &operator=(TensorWrapper &&other)

Assign the data from existing TensorWrapper.

Change ownership of an existing TE tensor.

Parameters:

other[inout] The source of the data.

template<typename ShapeType>
inline TensorWrapper &set_parameter(const NVTETensorParam param, void *dptr, DType type, const ShapeType &shape) noexcept
template<typename ShapeType>
inline TensorWrapper &set_rowwise_data(void *dptr, DType type, const ShapeType &shape) noexcept
template<typename ShapeType>
inline TensorWrapper &set_columnwise_data(void *dptr, DType type, const ShapeType &shape) noexcept
template<typename ShapeType>
inline TensorWrapper &set_scale(void *dptr, DType type, const ShapeType &shape) noexcept
template<typename ShapeType>
inline TensorWrapper &set_amax(void *dptr, DType type, const ShapeType &shape) noexcept
template<typename ShapeType>
inline TensorWrapper &set_rowwise_scale_inv(void *dptr, DType type, const ShapeType &shape) noexcept
template<typename ShapeType>
inline TensorWrapper &set_columnwise_scale_inv(void *dptr, DType type, const ShapeType &shape) noexcept
inline NVTEBasicTensor get_parameter(const NVTETensorParam param) const noexcept
inline NVTEBasicTensor get_rowwise_data() const noexcept
inline NVTEBasicTensor get_columnwise_data() const noexcept
inline NVTEBasicTensor get_scale() const noexcept
inline NVTEBasicTensor get_amax() const noexcept
inline NVTEBasicTensor get_rowwise_scale_inv() const noexcept
inline NVTEBasicTensor get_columnwise_scale_inv() const noexcept
inline NVTETensor data() const noexcept

Get an underlying NVTETensor.

Returns:

NVTETensor held by this TensorWrapper.

inline const NVTEShape shape() const noexcept

Get the shape of this TensorWrapper.

Returns:

Shape of this TensorWrapper.

inline const NVTEShape columnwise_shape() const noexcept

Get the shape of this TensorWrapper.

Returns:

Shape of this TensorWrapper.

inline size_t size(const size_t dim) const

Get the size of this TensorWrapper in the given dimension.

Parameters:

size_t[in] Dimension index.

Returns:

Size of this TensorWrapper in given dimension.

inline size_t ndim() const noexcept

Get the number of dimensions for this TensorWrapper.

Returns:

Number of dimensions for this TensorWrapper.

inline size_t numel() const noexcept

Get the number of allocated elements in the tensor. This will return 0 for tensors with nullptr data even if the TensorWrapper has a non-zero shape.

Returns:

Number of elements in the tensor.

inline size_t element_size() const noexcept

Get the tensor’s element size in bytes.

Returns:

Element size in bytes.

inline size_t bytes() const noexcept

Get the tensor’s allocated size in bytes. This will return 0 for tensors with nullptr data even if the TensorWrapper has a non-zero shape and valid dtype.

Returns:

Total tensor size in bytes.

inline DType dtype() const noexcept

Get the data type of this TensorWrapper.

Returns:

Data type of this TensorWrapper.

inline void *dptr() const noexcept

Get a raw pointer to the tensor’s data.

Returns:

A raw pointer to tensor’s data.

inline void *columnwise_dptr() const noexcept

Get a raw pointer to the tensor’s data.

Returns:

A raw pointer to tensor’s data.

inline float *amax() const noexcept

Get a pointer to the tensor’s amax data.

Returns:

A pointer to tensor’s amax data.

inline float *scale() const noexcept

Get a pointer to the tensor’s scale data.

Returns:

A pointer to tensor’s scale data.

inline float *scale_inv() const noexcept

Get a pointer to the tensor’s inverse of scale data.

Returns:

A pointer to tensor’s inverse of scale data.

inline const NVTEShape scale_inv_shape() const noexcept

Get the scale_inv_shape of this TensorWrapper.

Returns:

scale_inv_shape of this TensorWrapper.

inline NVTEScalingMode scaling_mode() const noexcept

Get a scaling mode of the tensor.

Returns:

Scaling mode of the tensor.

inline void zero_(cudaStream_t stream)

Public Static Attributes

static constexpr size_t defaultData = 1
static constexpr NVTEShape defaultShape = {&defaultData, 1}

Private Functions

inline NVTEShape convertShape(const NVTEShape &s)
inline NVTEShape convertShape(const std::vector<size_t> &s)

Private Members

NVTETensor tensor_ = nullptr

Wrapped NVTETensor.