warpgroup submodule#

class cutlass.cute.nvgpu.warpgroup.OperandMajorMode(value)#

Bases: Enum

An enumeration for the majorness of the input operands of the MMA.

class cutlass.cute.nvgpu.warpgroup.OperandSource(value)#

Bases: Enum

An enumeration for the source memory location of the A input operand of the MMA.

class cutlass.cute.nvgpu.warpgroup.Field(value)#

Bases: Enum

An enumeration for the fields of the MMA Atom that can be modified at runtime.

ACCUMULATE = 'accum_c'#

class cutlass.cute.nvgpu.warpgroup.MmaF16BF16Op( ab_dtype: Type[cutlass.cute.typing.Numeric], acc_dtype: Type[cutlass.cute.typing.Numeric], instruction_shape: cutlass.cute.typing.Shape, a_src: OperandSource, a_major_mode: OperandMajorMode, b_major_mode: OperandMajorMode, )#

Bases: MmaOp

F16/BF16 warpgroup MMA Operation.

See the PTX documentation. This Operation covers the instructions using the .f16 or .bf16 qualifiers for the input operands.

descriptive_name = 'warpgroup F16/BF16 MMA Operation'#

__init__( ab_dtype: Type[cutlass.cute.typing.Numeric], acc_dtype: Type[cutlass.cute.typing.Numeric], instruction_shape: cutlass.cute.typing.Shape, a_src: OperandSource, a_major_mode: OperandMajorMode, b_major_mode: OperandMajorMode, ) → None#

class cutlass.cute.nvgpu.warpgroup.MmaF8Op( a_dtype: Type[cutlass.cute.typing.Numeric], b_dtype: Type[cutlass.cute.typing.Numeric], acc_dtype: Type[cutlass.cute.typing.Numeric], instruction_shape: cutlass.cute.typing.Shape, a_src: OperandSource, a_major_mode: OperandMajorMode, b_major_mode: OperandMajorMode, )#

Bases: MmaOp

F16/BF16 warpgroup MMA Operation.

See the PTX documentation. This Operation covers the instructions using the .e4m3 or .e5m2 qualifiers for the input operands.

descriptive_name = 'warpgroup F8 MMA Operation'#

__init__( a_dtype: Type[cutlass.cute.typing.Numeric], b_dtype: Type[cutlass.cute.typing.Numeric], acc_dtype: Type[cutlass.cute.typing.Numeric], instruction_shape: cutlass.cute.typing.Shape, a_src: OperandSource, a_major_mode: OperandMajorMode, b_major_mode: OperandMajorMode, ) → None#

class cutlass.cute.nvgpu.warpgroup.SmemLayoutAtomKind(value)#

Bases: Enum

Enum class for the kinds of SMEM layout atoms for SM90.

Given a swizzle kind, an SMEM layout atom is the compact layout of smallest size that can be used to construct an SMEM layout using blocked product for operand A or B such that the resulting layout is legal for both TMA and UMMA.

Note that there are other ways of creating legal layouts for operand A and B.

MN_INTER = 1#

MN_SW32 = 2#

MN_SW64 = 3#

MN_SW128 = 4#

K_INTER = 5#

K_SW32 = 6#

K_SW64 = 7#

K_SW128 = 8#

cutlass.cute.nvgpu.warpgroup.make_smem_layout_atom( kind: SmemLayoutAtomKind, element_type: Type[cutlass.cute.typing.Numeric], *, loc=None, ip=None, ) → cutlass._mlir.ir.register_value_caster#

Makes a SMEM layout Atom.

This function creates a composed layout in unit of elements consistent with the requested layout Atom kind and element data type.

Parameters:

kind (SmemLayoutAtomKind) – The kind of layout Atom
element_type (Type[Numeric]) – The element data type to construct the layout for

Returns:

The SMEM layout atom

Return type:

core.ComposedLayout

cutlass.cute.nvgpu.warpgroup.fence(*, loc=None, ip=None) → None#: See the PTX documentation.

cutlass.cute.nvgpu.warpgroup.commit_group(*, loc=None, ip=None) → None#: See the PTX documentation.

cutlass.cute.nvgpu.warpgroup.wait_group(group, *, loc=None, ip=None) → None#: See the PTX documentation.