1.3.2. Bfloat16 Arithmetic Functions

[Bfloat16 Precision Intrinsics]

To use these functions, include the header file cuda_bf16.h in your program.

Functions

__host____device____nv_bfloat162 __h2div ( const __nv_bfloat162 a, const __nv_bfloat162 b )
Performs nv_bfloat162 vector division in round-to-nearest-even mode.
__host____device____nv_bfloat16 __habs ( const __nv_bfloat16 a )
Calculates the absolute value of input nv_bfloat16 number and returns the result.
__host____device____nv_bfloat16 __hadd ( const __nv_bfloat16 a, const __nv_bfloat16 b )
Performs nv_bfloat16 addition in round-to-nearest-even mode.
__host____device____nv_bfloat16 __hadd_rn ( const __nv_bfloat16 a, const __nv_bfloat16 b )
Performs nv_bfloat16 addition in round-to-nearest-even mode.
__host____device____nv_bfloat16 __hadd_sat ( const __nv_bfloat16 a, const __nv_bfloat16 b )
Performs nv_bfloat16 addition in round-to-nearest-even mode, with saturation to [0.0, 1.0].
__host____device____nv_bfloat16 __hdiv ( const __nv_bfloat16 a, const __nv_bfloat16 b )
Performs nv_bfloat16 division in round-to-nearest-even mode.
__device____nv_bfloat16 __hfma ( const __nv_bfloat16 a, const __nv_bfloat16 b, const __nv_bfloat16 c )
Performs nv_bfloat16 fused multiply-add in round-to-nearest-even mode.
__device____nv_bfloat16 __hfma_relu ( const __nv_bfloat16 a, const __nv_bfloat16 b, const __nv_bfloat16 c )
Performs nv_bfloat16 fused multiply-add in round-to-nearest-even mode with relu saturation.
__device____nv_bfloat16 __hfma_sat ( const __nv_bfloat16 a, const __nv_bfloat16 b, const __nv_bfloat16 c )
Performs nv_bfloat16 fused multiply-add in round-to-nearest-even mode, with saturation to [0.0, 1.0].
__host____device____nv_bfloat16 __hmul ( const __nv_bfloat16 a, const __nv_bfloat16 b )
Performs nv_bfloat16 multiplication in round-to-nearest-even mode.
__host____device____nv_bfloat16 __hmul_rn ( const __nv_bfloat16 a, const __nv_bfloat16 b )
Performs nv_bfloat16 multiplication in round-to-nearest-even mode.
__host____device____nv_bfloat16 __hmul_sat ( const __nv_bfloat16 a, const __nv_bfloat16 b )
Performs nv_bfloat16 multiplication in round-to-nearest-even mode, with saturation to [0.0, 1.0].
__host____device____nv_bfloat16 __hneg ( const __nv_bfloat16 a )
Negates input nv_bfloat16 number and returns the result.
__host____device____nv_bfloat16 __hsub ( const __nv_bfloat16 a, const __nv_bfloat16 b )
Performs nv_bfloat16 subtraction in round-to-nearest-even mode.
__host____device____nv_bfloat16 __hsub_rn ( const __nv_bfloat16 a, const __nv_bfloat16 b )
Performs nv_bfloat16 subtraction in round-to-nearest-even mode.
__host____device____nv_bfloat16 __hsub_sat ( const __nv_bfloat16 a, const __nv_bfloat16 b )
Performs nv_bfloat16 subtraction in round-to-nearest-even mode, with saturation to [0.0, 1.0].
__device____nv_bfloat16 atomicAdd ( const __nv_bfloat16* address, const __nv_bfloat16 val )
Adds val to the value stored at address in global or shared memory, and writes this value back to address. This operation is performed in one atomic operation.
__host____device____nv_bfloat16 operator* ( const __nv_bfloat16& lh, const __nv_bfloat16& rh )
__host____device____nv_bfloat16operator*= ( __nv_bfloat16& lh, const __nv_bfloat16& rh )
__host____device____nv_bfloat16 operator+ ( const __nv_bfloat16& h )
__host____device____nv_bfloat16 operator+ ( const __nv_bfloat16& lh, const __nv_bfloat16& rh )
__host____device____nv_bfloat16 operator++ ( __nv_bfloat16& h, const int  ignored )
__host____device____nv_bfloat16operator++ ( __nv_bfloat16& h )
__host____device____nv_bfloat16operator+= ( __nv_bfloat16& lh, const __nv_bfloat16& rh )
__host____device____nv_bfloat16 operator- ( const __nv_bfloat16& h )
__host____device____nv_bfloat16 operator- ( const __nv_bfloat16& lh, const __nv_bfloat16& rh )
__host____device____nv_bfloat16 operator-- ( __nv_bfloat16& h, const int  ignored )
__host____device____nv_bfloat16operator-- ( __nv_bfloat16& h )
__host____device____nv_bfloat16operator-= ( __nv_bfloat16& lh, const __nv_bfloat16& rh )
__host____device____nv_bfloat16 operator/ ( const __nv_bfloat16& lh, const __nv_bfloat16& rh )
__host____device____nv_bfloat16operator/= ( __nv_bfloat16& lh, const __nv_bfloat16& rh )

Functions

__host____device____nv_bfloat162 __h2div ( const __nv_bfloat162 a, const __nv_bfloat162 b )
Performs nv_bfloat162 vector division in round-to-nearest-even mode.
Description

Divides nv_bfloat162 input vector a by input vector b in round-to-nearest-even mode.

__host____device____nv_bfloat16 __habs ( const __nv_bfloat16 a )
Calculates the absolute value of input nv_bfloat16 number and returns the result.
Parameters
a
- nv_bfloat16. Is only being read.
Returns

nv_bfloat16

  • The absolute value of a.
Description

Calculates the absolute value of input nv_bfloat16 number and returns the result.

__host____device____nv_bfloat16 __hadd ( const __nv_bfloat16 a, const __nv_bfloat16 b )
Performs nv_bfloat16 addition in round-to-nearest-even mode.
Description

Performs nv_bfloat16 addition of inputs a and b, in round-to-nearest-even mode.

__host____device____nv_bfloat16 __hadd_rn ( const __nv_bfloat16 a, const __nv_bfloat16 b )
Performs nv_bfloat16 addition in round-to-nearest-even mode.
Description

Performs nv_bfloat16 addition of inputs a and b, in round-to-nearest-even mode. Prevents floating-point contractions of mul+add into fma.

__host____device____nv_bfloat16 __hadd_sat ( const __nv_bfloat16 a, const __nv_bfloat16 b )
Performs nv_bfloat16 addition in round-to-nearest-even mode, with saturation to [0.0, 1.0].
Parameters
a
- nv_bfloat16. Is only being read.
b
- nv_bfloat16. Is only being read.
Returns

nv_bfloat16

  • The sum of a and b, with respect to saturation.
Description

Performs nv_bfloat16 add of inputs a and b, in round-to-nearest-even mode, and clamps the result to range [0.0, 1.0]. NaN results are flushed to +0.0.

__host____device____nv_bfloat16 __hdiv ( const __nv_bfloat16 a, const __nv_bfloat16 b )
Performs nv_bfloat16 division in round-to-nearest-even mode.
Description

Divides nv_bfloat16 input a by input b in round-to-nearest-even mode.

__device____nv_bfloat16 __hfma ( const __nv_bfloat16 a, const __nv_bfloat16 b, const __nv_bfloat16 c )
Performs nv_bfloat16 fused multiply-add in round-to-nearest-even mode.
Description

Performs nv_bfloat16 multiply on inputs a and b, then performs a nv_bfloat16 add of the result with c, rounding the result once in round-to-nearest-even mode.

__device____nv_bfloat16 __hfma_relu ( const __nv_bfloat16 a, const __nv_bfloat16 b, const __nv_bfloat16 c )
Performs nv_bfloat16 fused multiply-add in round-to-nearest-even mode with relu saturation.
Parameters
a
- nv_bfloat16. Is only being read.
b
- nv_bfloat16. Is only being read.
c
- nv_bfloat16. Is only being read.
Returns

nv_bfloat16

  • The result of fused multiply-add operation on a, b, and c with relu saturation.
Description

Performs nv_bfloat16 multiply on inputs a and b, then performs a nv_bfloat16 add of the result with c, rounding the result once in round-to-nearest-even mode. Then negative result is clamped to 0. NaN result is converted to canonical NaN.

__device____nv_bfloat16 __hfma_sat ( const __nv_bfloat16 a, const __nv_bfloat16 b, const __nv_bfloat16 c )
Performs nv_bfloat16 fused multiply-add in round-to-nearest-even mode, with saturation to [0.0, 1.0].
Parameters
a
- nv_bfloat16. Is only being read.
b
- nv_bfloat16. Is only being read.
c
- nv_bfloat16. Is only being read.
Returns

nv_bfloat16

  • The result of fused multiply-add operation on a, b, and c, with respect to saturation.
Description

Performs nv_bfloat16 multiply on inputs a and b, then performs a nv_bfloat16 add of the result with c, rounding the result once in round-to-nearest-even mode, and clamps the result to range [0.0, 1.0]. NaN results are flushed to +0.0.

__host____device____nv_bfloat16 __hmul ( const __nv_bfloat16 a, const __nv_bfloat16 b )
Performs nv_bfloat16 multiplication in round-to-nearest-even mode.
Description

Performs nv_bfloat16 multiplication of inputs a and b, in round-to-nearest-even mode.

__host____device____nv_bfloat16 __hmul_rn ( const __nv_bfloat16 a, const __nv_bfloat16 b )
Performs nv_bfloat16 multiplication in round-to-nearest-even mode.
Description

Performs nv_bfloat16 multiplication of inputs a and b, in round-to-nearest-even mode. Prevents floating-point contractions of mul+add or sub into fma.

__host____device____nv_bfloat16 __hmul_sat ( const __nv_bfloat16 a, const __nv_bfloat16 b )
Performs nv_bfloat16 multiplication in round-to-nearest-even mode, with saturation to [0.0, 1.0].
Parameters
a
- nv_bfloat16. Is only being read.
b
- nv_bfloat16. Is only being read.
Returns

nv_bfloat16

  • The result of multiplying a and b, with respect to saturation.
Description

Performs nv_bfloat16 multiplication of inputs a and b, in round-to-nearest-even mode, and clamps the result to range [0.0, 1.0]. NaN results are flushed to +0.0.

__host____device____nv_bfloat16 __hneg ( const __nv_bfloat16 a )
Negates input nv_bfloat16 number and returns the result.
Description

Negates input nv_bfloat16 number and returns the result.

__host____device____nv_bfloat16 __hsub ( const __nv_bfloat16 a, const __nv_bfloat16 b )
Performs nv_bfloat16 subtraction in round-to-nearest-even mode.
Description

Subtracts nv_bfloat16 input b from input a in round-to-nearest-even mode.

__host____device____nv_bfloat16 __hsub_rn ( const __nv_bfloat16 a, const __nv_bfloat16 b )
Performs nv_bfloat16 subtraction in round-to-nearest-even mode.
Description

Subtracts nv_bfloat16 input b from input a in round-to-nearest-even mode. Prevents floating-point contractions of mul+sub into fma.

__host____device____nv_bfloat16 __hsub_sat ( const __nv_bfloat16 a, const __nv_bfloat16 b )
Performs nv_bfloat16 subtraction in round-to-nearest-even mode, with saturation to [0.0, 1.0].
Parameters
a
- nv_bfloat16. Is only being read.
b
- nv_bfloat16. Is only being read.
Returns

nv_bfloat16

  • The result of subtraction of b from a, with respect to saturation.
Description

Subtracts nv_bfloat16 input b from input a in round-to-nearest-even mode, and clamps the result to range [0.0, 1.0]. NaN results are flushed to +0.0.

__device____nv_bfloat16 atomicAdd ( const __nv_bfloat16* address, const __nv_bfloat16 val )
Adds val to the value stored at address in global or shared memory, and writes this value back to address. This operation is performed in one atomic operation.
Parameters
address
- __nv_bfloat16*. An address in global or shared memory.
val
- __nv_bfloat16. The value to be added.
Returns

__nv_bfloat16

  • The old value read from address.
Description

The location of address must be in global or shared memory. This operation has undefined behavior otherwise. This operation is natively supported by devices of compute capability 9.x and higher, older devices of compute capability 7.x and 8.x use emulation path.

Note:

For more details for this function see the Atomic Functions section in the CUDA C++ Programming Guide.

__host____device____nv_bfloat16 operator* ( const __nv_bfloat16& lh, const __nv_bfloat16& rh )
Description

Performs nv_bfloat16 multiplication operation. See also __hmul( __nv_bfloat16, __nv_bfloat16)

__host____device____nv_bfloat16& operator*= ( __nv_bfloat16& lh, const __nv_bfloat16& rh )
Description

Performs nv_bfloat16 compound assignment with multiplication operation.

__host____device____nv_bfloat16 operator+ ( const __nv_bfloat16& h )
Description

Implements nv_bfloat16 unary plus operator, returns input value.

__host____device____nv_bfloat16 operator+ ( const __nv_bfloat16& lh, const __nv_bfloat16& rh )
Description

Performs nv_bfloat16 addition operation. See also __hadd( __nv_bfloat16, __nv_bfloat16)

__host____device____nv_bfloat16 operator++ ( __nv_bfloat16& h, const int  ignored )
Description

Performs nv_bfloat16 postfix increment operation.

__host____device____nv_bfloat16& operator++ ( __nv_bfloat16& h )
Description

Performs nv_bfloat16 prefix increment operation.

__host____device____nv_bfloat16& operator+= ( __nv_bfloat16& lh, const __nv_bfloat16& rh )
Description

Performs nv_bfloat16 compound assignment with addition operation.

__host____device____nv_bfloat16 operator- ( const __nv_bfloat16& h )
Description

Implements nv_bfloat16 unary minus operator. See also __hneg( __nv_bfloat16)

__host____device____nv_bfloat16 operator- ( const __nv_bfloat16& lh, const __nv_bfloat16& rh )
Description

Performs nv_bfloat16 subtraction operation. See also __hsub( __nv_bfloat16, __nv_bfloat16)

__host____device____nv_bfloat16 operator-- ( __nv_bfloat16& h, const int  ignored )
Description

Performs nv_bfloat16 postfix decrement operation.

__host____device____nv_bfloat16& operator-- ( __nv_bfloat16& h )
Description

Performs nv_bfloat16 prefix decrement operation.

__host____device____nv_bfloat16& operator-= ( __nv_bfloat16& lh, const __nv_bfloat16& rh )
Description

Performs nv_bfloat16 compound assignment with subtraction operation.

__host____device____nv_bfloat16 operator/ ( const __nv_bfloat16& lh, const __nv_bfloat16& rh )
Description

Performs nv_bfloat16 division operation. See also __hdiv( __nv_bfloat16, __nv_bfloat16)

__host____device____nv_bfloat16& operator/= ( __nv_bfloat16& lh, const __nv_bfloat16& rh )
Description

Performs nv_bfloat16 compound assignment with division operation.