api/c_api/_nv_infer_runtime_plugin_8h_source.html

/*

 * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

 * SPDX-License-Identifier: Apache-2.0

 *

 * Licensed under the Apache License, Version 2.0 (the "License");

 * you may not use this file except in compliance with the License.

 * You may obtain a copy of the License at

 *

 * http://www.apache.org/licenses/LICENSE-2.0

 *

 * Unless required by applicable law or agreed to in writing, software

 * distributed under the License is distributed on an "AS IS" BASIS,

 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

 * See the License for the specific language governing permissions and

 * limitations under the License.

 */


#ifndef NV_INFER_RUNTIME_PLUGIN_H

#define NV_INFER_RUNTIME_PLUGIN_H


#define NV_INFER_INTERNAL_INCLUDE_RUNTIME_BASE 1

#include "NvInferRuntimeBase.h"

#undef NV_INFER_INTERNAL_INCLUDE_RUNTIME_BASE


namespace nvinfer1

{


using PluginFormat = TensorFormat;


static constexpr int32_t kPLUGIN_VERSION_PYTHON_BIT = 0x40;


struct PluginTensorDesc

{

    Dims dims;

    DataType type;

    TensorFormat format;

    float scale;

};


enum class PluginVersion : uint8_t

{

    kV2 = 0,

    kV2_EXT = 1,

    kV2_IOEXT = 2,

    kV2_DYNAMICEXT = 3,

    kV2_DYNAMICEXT_PYTHON = kPLUGIN_VERSION_PYTHON_BIT | 3

};


enum class PluginCreatorVersion : int32_t

{

    kV1 = 0,

    kV1_PYTHON = kPLUGIN_VERSION_PYTHON_BIT

};


class TRT_DEPRECATED IPluginV2

{

public:

    virtual int32_t getTensorRTVersion() const noexcept

    {

        return NV_TENSORRT_VERSION;

    }


    virtual AsciiChar const* getPluginType() const noexcept = 0;


    virtual AsciiChar const* getPluginVersion() const noexcept = 0;


    virtual int32_t getNbOutputs() const noexcept = 0;


    virtual Dims getOutputDimensions(int32_t index, Dims const* inputs, int32_t nbInputDims) noexcept = 0;


    virtual bool supportsFormat(DataType type, PluginFormat format) const noexcept = 0;


    virtual void configureWithFormat(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs,

        DataType type, PluginFormat format, int32_t maxBatchSize) noexcept

        = 0;


    virtual int32_t initialize() noexcept = 0;


    virtual void terminate() noexcept = 0;


    virtual size_t getWorkspaceSize(int32_t maxBatchSize) const noexcept = 0;


    virtual int32_t enqueue(int32_t batchSize, void const* const* inputs, void* const* outputs, void* workspace,

        cudaStream_t stream) noexcept

        = 0;


    virtual size_t getSerializationSize() const noexcept = 0;


    virtual void serialize(void* buffer) const noexcept = 0;


    virtual void destroy() noexcept = 0;


    virtual IPluginV2* clone() const noexcept = 0;


    virtual void setPluginNamespace(AsciiChar const* pluginNamespace) noexcept = 0;


    virtual AsciiChar const* getPluginNamespace() const noexcept = 0;


    // @cond SuppressDoxyWarnings

    IPluginV2() = default;

    virtual ~IPluginV2() noexcept = default;

// @endcond


protected:

// @cond SuppressDoxyWarnings

    IPluginV2(IPluginV2 const&) = default;

    IPluginV2(IPluginV2&&) = default;

    IPluginV2& operator=(IPluginV2 const&) & = default;

    IPluginV2& operator=(IPluginV2&&) & = default;

// @endcond

};


class TRT_DEPRECATED IPluginV2Ext : public IPluginV2

{

public:

    virtual nvinfer1::DataType getOutputDataType(

        int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept

        = 0;


    TRT_DEPRECATED virtual bool isOutputBroadcastAcrossBatch(

        int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept

        = 0;


    TRT_DEPRECATED virtual bool canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept = 0;


    virtual void configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs,

        DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast,

        bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept

        = 0;


    IPluginV2Ext() = default;

    ~IPluginV2Ext() override = default;


    virtual void attachToContext(

        cudnnContext* /*cudnn*/, cublasContext* /*cublas*/, IGpuAllocator* /*allocator*/) noexcept

    {

    }


    virtual void detachFromContext() noexcept {}


    IPluginV2Ext* clone() const noexcept override = 0;


protected:

    // @cond SuppressDoxyWarnings

    IPluginV2Ext(IPluginV2Ext const&) = default;

    IPluginV2Ext(IPluginV2Ext&&) = default;

    IPluginV2Ext& operator=(IPluginV2Ext const&) & = default;

    IPluginV2Ext& operator=(IPluginV2Ext&&) & = default;

// @endcond


    int32_t getTensorRTVersion() const noexcept override

    {

        return static_cast<int32_t>((static_cast<uint32_t>(PluginVersion::kV2_EXT) << 24U)

            | (static_cast<uint32_t>(NV_TENSORRT_VERSION) & 0xFFFFFFU));

    }


    void configureWithFormat(Dims const* /*inputDims*/, int32_t /*nbInputs*/, Dims const* /*outputDims*/,

        int32_t /*nbOutputs*/, DataType /*type*/, PluginFormat /*format*/, int32_t /*maxBatchSize*/) noexcept override

    {

    }

};


class TRT_DEPRECATED IPluginV2IOExt : public IPluginV2Ext

{

public:

    virtual void configurePlugin(

        PluginTensorDesc const* in, int32_t nbInput, PluginTensorDesc const* out, int32_t nbOutput) noexcept

        = 0;


    virtual bool supportsFormatCombination(

        int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) const noexcept

        = 0;


    // @cond SuppressDoxyWarnings

    IPluginV2IOExt() = default;

    ~IPluginV2IOExt() override = default;

// @endcond


protected:

// @cond SuppressDoxyWarnings

    IPluginV2IOExt(IPluginV2IOExt const&) = default;

    IPluginV2IOExt(IPluginV2IOExt&&) = default;

    IPluginV2IOExt& operator=(IPluginV2IOExt const&) & = default;

    IPluginV2IOExt& operator=(IPluginV2IOExt&&) & = default;

// @endcond


    int32_t getTensorRTVersion() const noexcept override

    {

        return static_cast<int32_t>((static_cast<uint32_t>(PluginVersion::kV2_IOEXT) << 24U)

            | (static_cast<uint32_t>(NV_TENSORRT_VERSION) & 0xFFFFFFU));

    }


private:

    // Following are obsolete base class methods, and must not be implemented or used.


    void configurePlugin(Dims const*, int32_t, Dims const*, int32_t, DataType const*, DataType const*, bool const*,

        bool const*, PluginFormat, int32_t) noexcept final

    {

    }


    bool supportsFormat(DataType, PluginFormat) const noexcept final

    {

        return false;

    }

};


enum class PluginFieldType : int32_t

{

    kFLOAT16 = 0,

    kFLOAT32 = 1,

    kFLOAT64 = 2,

    kINT8 = 3,

    kINT16 = 4,

    kINT32 = 5,

    kCHAR = 6,

    kDIMS = 7,

    kUNKNOWN = 8,

    kBF16 = 9,

    kINT64 = 10,

    kFP8 = 11,

    kINT4 = 12,

};


class PluginField

{

public:

    AsciiChar const* name;

    void const* data;

    PluginFieldType type;

    int32_t length;


    PluginField(AsciiChar const* const name_ = nullptr, void const* const data_ = nullptr,

        PluginFieldType const type_ = PluginFieldType::kUNKNOWN, int32_t const length_ = 0) noexcept

        : name(name_)

        , data(data_)

        , type(type_)

        , length(length_)

    {

    }

};


struct PluginFieldCollection

{

    int32_t nbFields;

    PluginField const* fields;

};


enum class PluginCapabilityType : int32_t

{

    kCORE = 0,

    kBUILD = 1,

    kRUNTIME = 2

};


enum class TensorRTPhase : int32_t

{

    kBUILD = 0,

    kRUNTIME = 1

};


namespace v_1_0

{

class IPluginCreatorInterface : public IVersionedInterface

{

public:

    ~IPluginCreatorInterface() noexcept override = default;


protected:

    IPluginCreatorInterface() = default;

    IPluginCreatorInterface(IPluginCreatorInterface const&) = default;

    IPluginCreatorInterface(IPluginCreatorInterface&&) = default;

    IPluginCreatorInterface& operator=(IPluginCreatorInterface const&) & = default;

    IPluginCreatorInterface& operator=(IPluginCreatorInterface&&) & = default;

};


class TRT_DEPRECATED IPluginCreator : public IPluginCreatorInterface

{

public:

    virtual AsciiChar const* getPluginName() const noexcept = 0;


    virtual AsciiChar const* getPluginVersion() const noexcept = 0;


    virtual PluginFieldCollection const* getFieldNames() noexcept = 0;


    virtual IPluginV2* createPlugin(AsciiChar const* name, PluginFieldCollection const* fc) noexcept = 0;


    virtual IPluginV2* deserializePlugin(AsciiChar const* name, void const* serialData, size_t serialLength) noexcept

        = 0;


    virtual void setPluginNamespace(AsciiChar const* pluginNamespace) noexcept = 0;


    virtual AsciiChar const* getPluginNamespace() const noexcept = 0;


    IPluginCreator() = default;

    ~IPluginCreator() override = default;


protected:

    // @cond SuppressDoxyWarnings

    IPluginCreator(IPluginCreator const&) = default;

    IPluginCreator(IPluginCreator&&) = default;

    IPluginCreator& operator=(IPluginCreator const&) & = default;

    IPluginCreator& operator=(IPluginCreator&&) & = default;

    // @endcond

public:

    InterfaceInfo getInterfaceInfo() const noexcept override

    {

        return InterfaceInfo{"PLUGIN CREATOR_V1", 1, 0};

    }

};

} // namespace v_1_0


using IPluginCreatorInterface = v_1_0::IPluginCreatorInterface;


using IPluginCreator = v_1_0::IPluginCreator;


} // namespace nvinfer1


#endif // NV_INFER_RUNTIME_PLUGIN_H

NvInferRuntimeBase.h

NV_TENSORRT_VERSION
#define NV_TENSORRT_VERSION
Definition: NvInferRuntimeBase.h:93

TRT_DEPRECATED
#define TRT_DEPRECATED
Definition: NvInferRuntimeBase.h:45

nvinfer1::Dims64
Definition: NvInferRuntimeBase.h:202

nvinfer1::IPluginV2Ext
Plugin class for user-implemented layers.
Definition: NvInferRuntimePlugin.h:464

nvinfer1::IPluginV2Ext::canBroadcastInputAcrossBatch
virtual TRT_DEPRECATED bool canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept=0
Return true if the plugin can use an input tensor that is broadcast across batch without replication.

nvinfer1::IPluginV2Ext::~IPluginV2Ext
~IPluginV2Ext() override=default

nvinfer1::IPluginV2Ext::configureWithFormat
void configureWithFormat(Dims const *, int32_t, Dims const *, int32_t, DataType, PluginFormat, int32_t) noexcept override
Derived classes must not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntimePlugin.h:694

nvinfer1::IPluginV2Ext::clone
IPluginV2Ext * clone() const noexcept override=0
Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin...

nvinfer1::IPluginV2Ext::configurePlugin
virtual void configurePlugin(Dims const *inputDims, int32_t nbInputs, Dims const *outputDims, int32_t nbOutputs, DataType const *inputTypes, DataType const *outputTypes, bool const *inputIsBroadcast, bool const *outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept=0
Configure the layer with input and output data types.

nvinfer1::IPluginV2Ext::detachFromContext
virtual void detachFromContext() noexcept
Detach the plugin object from its execution context.
Definition: NvInferRuntimePlugin.h:641

nvinfer1::IPluginV2Ext::isOutputBroadcastAcrossBatch
virtual TRT_DEPRECATED bool isOutputBroadcastAcrossBatch(int32_t outputIndex, bool const *inputIsBroadcasted, int32_t nbInputs) const noexcept=0
Return true if the output tensor is broadcast across a batch.

nvinfer1::IPluginV2Ext::attachToContext
virtual void attachToContext(cudnnContext *, cublasContext *, IGpuAllocator *) noexcept
Attach the plugin object to an execution context and grant the plugin the access to some context reso...
Definition: NvInferRuntimePlugin.h:623

nvinfer1::IPluginV2Ext::IPluginV2Ext
IPluginV2Ext()=default

nvinfer1::IPluginV2Ext::getOutputDataType
virtual nvinfer1::DataType getOutputDataType(int32_t index, nvinfer1::DataType const *inputTypes, int32_t nbInputs) const noexcept=0
Return the DataType of the plugin output at the requested index.

nvinfer1::IPluginV2
Plugin class for user-implemented layers.
Definition: NvInferRuntimePlugin.h:128

nvinfer1::IPluginV2::getPluginType
virtual AsciiChar const * getPluginType() const noexcept=0
Return the plugin type. Should match the plugin name returned by the corresponding plugin creator.

nvinfer1::IPluginV2::getTensorRTVersion
virtual int32_t getTensorRTVersion() const noexcept
Return the API version with which this plugin was built.
Definition: NvInferRuntimePlugin.h:142

nvinfer1::IPluginV2IOExt
Plugin class for user-implemented layers.
Definition: NvInferRuntimePlugin.h:713

nvinfer1::IPluginV2IOExt::getTensorRTVersion
int32_t getTensorRTVersion() const noexcept override
Return the API version with which this plugin was built. The upper byte is reserved by TensorRT and i...
Definition: NvInferRuntimePlugin.h:801

nvinfer1::IPluginV2IOExt::configurePlugin
virtual void configurePlugin(PluginTensorDesc const *in, int32_t nbInput, PluginTensorDesc const *out, int32_t nbOutput) noexcept=0
Configure the layer.

nvinfer1::IPluginV2IOExt::supportsFormatCombination
virtual bool supportsFormatCombination(int32_t pos, PluginTensorDesc const *inOut, int32_t nbInputs, int32_t nbOutputs) const noexcept=0
Return true if plugin supports the format and datatype for the input/output indexed by pos.

nvinfer1::IVersionedInterface
An Interface class for version control.
Definition: NvInferRuntimeBase.h:400

nvinfer1::InterfaceInfo
Version information associated with a TRT interface.
Definition: NvInferRuntimeBase.h:365

nvinfer1::PluginField
Structure containing plugin attribute field names and associated data This information can be parsed ...
Definition: NvInferRuntimePlugin.h:870

nvinfer1::PluginField::name
AsciiChar const  * name
Plugin field attribute name.
Definition: NvInferRuntimePlugin.h:873

nvinfer1::PluginField::PluginField
PluginField(AsciiChar const *const name_=nullptr, void const *const data_=nullptr, PluginFieldType const type_=PluginFieldType::kUNKNOWN, int32_t const length_=0) noexcept
Definition: NvInferRuntimePlugin.h:881

nvinfer1::PluginField::data
void const  * data
Plugin field attribute data.
Definition: NvInferRuntimePlugin.h:875

nvinfer1::PluginField::length
int32_t length
Number of data entries in the Plugin attribute.
Definition: NvInferRuntimePlugin.h:879

nvinfer1::PluginField::type
PluginFieldType type
Plugin field attribute type.
Definition: NvInferRuntimePlugin.h:877

nvinfer1::v_1_0::IGpuAllocator
Definition: NvInferRuntimeBase.h:469

nvinfer1::v_1_0::IPluginCreator
Definition: NvInferRuntimePlugin.h:948

nvinfer1::v_1_0::IPluginCreator::getPluginName
virtual AsciiChar const * getPluginName() const noexcept=0
Return the plugin name.

nvinfer1::v_1_0::IPluginCreatorInterface
Definition: NvInferRuntimePlugin.h:935

nvinfer1::v_1_0::IPluginCreatorInterface::~IPluginCreatorInterface
~IPluginCreatorInterface() noexcept override=default

nvinfer1
The TensorRT API version 1 namespace.

nvinfer1::PluginFieldType
PluginFieldType
The possible field types for custom layer.
Definition: NvInferRuntimePlugin.h:833

nvinfer1::PluginFieldType::kUNKNOWN
@ kUNKNOWN
Unknown field type.

nvinfer1::PluginFieldType::kFLOAT32
@ kFLOAT32
FP32 field type.

nvinfer1::PluginFieldType::kCHAR
@ kCHAR
char field type.

nvinfer1::PluginFieldType::kINT16
@ kINT16
INT16 field type.

nvinfer1::PluginFieldType::kDIMS
@ kDIMS
nvinfer1::Dims field type.

nvinfer1::PluginFieldType::kFLOAT64
@ kFLOAT64
FP64 field type.

nvinfer1::PluginFieldType::kFLOAT16
@ kFLOAT16
FP16 field type.

nvinfer1::PluginCreatorVersion
PluginCreatorVersion
Enum to identify version of the plugin creator.
Definition: NvInferRuntimePlugin.h:105

nvinfer1::PluginCreatorVersion::kV1
@ kV1
IPluginCreator.

nvinfer1::PluginCreatorVersion::kV1_PYTHON
@ kV1_PYTHON
IPluginCreator-based Python plugin creators.

nvinfer1::IPluginCreator
v_1_0::IPluginCreator IPluginCreator
Definition: NvInferRuntimePlugin.h:1093

nvinfer1::PluginCapabilityType
PluginCapabilityType
Enumerates the different capability types a IPluginV3 object may have.
Definition: NvInferRuntimePlugin.h:910

nvinfer1::PluginCapabilityType::kBUILD
@ kBUILD
Build capability. IPluginV3 objects provided to TensorRT build phase must have this.

nvinfer1::PluginCapabilityType::kRUNTIME
@ kRUNTIME
Runtime capability. IPluginV3 objects provided to TensorRT build and execution phases must have this.

nvinfer1::PluginCapabilityType::kCORE
@ kCORE
Core capability. Every IPluginV3 object must have this.

nvinfer1::AsciiChar
char_t AsciiChar
Definition: NvInferRuntimeBase.h:107

nvinfer1::TensorRTPhase
TensorRTPhase
Indicates a phase of operation of TensorRT.
Definition: NvInferRuntimePlugin.h:925

nvinfer1::PluginVersion::kV2_DYNAMICEXT
@ kV2_DYNAMICEXT
IPluginV2DynamicExt.

nvinfer1::PluginVersion::kV2
@ kV2
IPluginV2.

nvinfer1::PluginVersion::kV2_IOEXT
@ kV2_IOEXT
IPluginV2IOExt.

nvinfer1::PluginVersion::kV2_EXT
@ kV2_EXT
IPluginV2Ext.

nvinfer1::PluginVersion::kV2_DYNAMICEXT_PYTHON
@ kV2_DYNAMICEXT_PYTHON
IPluginV2DynamicExt-based Python plugins.

nvinfer1::DataType
DataType
The type of weights and tensors.
Definition: NvInferRuntimeBase.h:135

nvinfer1::DataType::kINT64
@ kINT64
Signed 64-bit integer type.

nvinfer1::DataType::kINT32
@ kINT32
Signed 32-bit integer format.

nvinfer1::PluginFormat
TensorFormat PluginFormat
PluginFormat is reserved for backward compatibility.
Definition: NvInferRuntimePlugin.h:48

nvinfer1::IPluginCreatorInterface
v_1_0::IPluginCreatorInterface IPluginCreatorInterface
Definition: NvInferRuntimePlugin.h:1081

nvinfer1::BuilderFlag::kINT8
@ kINT8
Enable Int8 layer selection, with FP32 fallback with FP16 fallback if kFP16 also specified.

nvinfer1::BuilderFlag::kFP8
@ kFP8

nvinfer1::BuilderFlag::kBF16
@ kBF16

nvinfer1::BuilderFlag::kINT4
@ kINT4
Enable plugins with INT4 input/output.

nvinfer1::TensorFormat
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntimeBase.h:250

PluginVersion
Definition of plugin versions.

nvinfer1::PluginFieldCollection
Plugin field collection struct.
Definition: NvInferRuntimePlugin.h:897

nvinfer1::PluginFieldCollection::fields
PluginField const  * fields
Pointer to PluginField entries.
Definition: NvInferRuntimePlugin.h:901

nvinfer1::PluginFieldCollection::nbFields
int32_t nbFields
Number of PluginField entries.
Definition: NvInferRuntimePlugin.h:899

nvinfer1::PluginTensorDesc
Fields that a plugin might see for an input or output.
Definition: NvInferRuntimePlugin.h:67

nvinfer1::PluginTensorDesc::type
DataType type
Definition: NvInferRuntimePlugin.h:71

nvinfer1::PluginTensorDesc::dims
Dims dims
Dimensions.
Definition: NvInferRuntimePlugin.h:69

nvinfer1::PluginTensorDesc::format
TensorFormat format
Tensor format.
Definition: NvInferRuntimePlugin.h:73

nvinfer1::PluginTensorDesc::scale
float scale
Scale for INT8 data type.
Definition: NvInferRuntimePlugin.h:75