Go to the documentation of this file.
50 #ifndef NV_INFER_RUNTIME_H
51 #define NV_INFER_RUNTIME_H
64 class IExecutionContext;
131 virtual void*
data()
const noexcept = 0;
132 virtual std::size_t
size()
const noexcept = 0;
134 virtual void destroy() noexcept = 0;
189 virtual void configure(
const Dims* inputDims, int32_t nbInputs,
const Dims* outputDims, int32_t nbOutputs,
226 virtual int32_t
enqueue(int32_t batchSize,
const void*
const* inputs,
void** outputs,
void* workspace,
267 return NV_TENSORRT_VERSION;
588 return Dims{-1, {}, {}};
681 int32_t
enqueue(int32_t ,
const void*
const* ,
void** ,
void* ,
780 virtual void setDLACore(int32_t dlaCore) noexcept = 0;
788 virtual int32_t
getDLACore()
const noexcept = 0;
798 virtual void destroy() noexcept = 0;
1040 constexpr
inline int32_t EnumMax<OptProfileSelector>()
1128 const char* inputName,
OptProfileSelector select,
const int32_t* values, int32_t nbValues) noexcept
1137 virtual int32_t
getNbShapeValues(
const char* inputName)
const noexcept = 0;
1177 virtual bool isValid()
const noexcept = 0;
1239 virtual const char*
getBindingName(int32_t bindingIndex)
const noexcept = 0;
1249 virtual bool bindingIsInput(int32_t bindingIndex)
const noexcept = 0;
1343 virtual void destroy() noexcept = 0;
1447 virtual const char*
getName()
const noexcept = 0;
1539 virtual bool isShapeBinding(int32_t bindingIndex)
const noexcept = 0;
1630 virtual bool execute(int32_t batchSize,
void** bindings) noexcept = 0;
1690 virtual void destroy() noexcept = 0;
1703 virtual void setName(
const char* name) noexcept = 0;
1710 virtual const char*
getName()
const noexcept = 0;
1741 virtual Dims getStrides(int32_t bindingIndex)
const noexcept = 0;
1872 virtual bool getShapeBinding(int32_t bindingIndex, int32_t* data)
const noexcept = 0;
1935 virtual bool executeV2(
void** bindings) noexcept = 0;
2006 extern "C" TENSORRTAPI
void* createInferRuntime_INTERNAL(
void* logger, int32_t version);
2012 extern "C" TENSORRTAPI
void* createInferRefitter_INTERNAL(
void* engine,
void* logger, int32_t version);
2025 return static_cast<IRuntime*>(createInferRuntime_INTERNAL(&logger, NV_TENSORRT_VERSION));
2035 return static_cast<IRefitter*>(createInferRefitter_INTERNAL(&engine, &logger, NV_TENSORRT_VERSION));
2040 #endif // NV_INFER_RUNTIME_H
virtual IPlugin * createPlugin(const char *layerName, const void *serialData, size_t serialLength)=0
Create a plugin from serialized data.
virtual IProfiler * getProfiler() const noexcept=0
Get the profiler.
virtual float getDynamicRangeMin(const char *tensorName) const =0
Get minimum of dynamic range.
virtual bool getDebugSync() const noexcept=0
Get the debug sync flag.
virtual void reportLayerTime(const char *layerName, float ms)=0
Layer time reporting callback.
virtual int32_t getTensorsWithDynamicRange(int32_t size, const char **tensorNames) const =0
Get names of all tensors that have refittable dynamic ranges.
virtual void destroy() noexcept=0
Destroy this object.
virtual const ICudaEngine & getEngine() const noexcept=0
Get the associated engine.
Definition: NvInferRuntime.h:420
virtual bool setWeights(const char *layerName, WeightsRole role, Weights weights)=0
Specify new weights for a layer of given name. Returns true on success, or false if new weights are r...
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:328
virtual void configureWithFormat(const Dims *inputDims, int32_t nbInputs, const Dims *outputDims, int32_t nbOutputs, DataType type, PluginFormat format, int32_t maxBatchSize)=0
Configure the layer.
Minimum of the two elements.
virtual void destroy() noexcept=0
Destroy the allocated memory.
const IDimensionExpr * d[Dims::MAX_DIMS]
The extent of each dimension.
Definition: NvInferRuntime.h:412
virtual int32_t getNbOptimizationProfiles() const noexcept=0
Get the number of optimization profiles defined for this engine.
virtual bool setInputShapeBinding(int32_t bindingIndex, const int32_t *data) noexcept=0
Set values of input tensor required by shape calculations.
virtual std::size_t size() const noexcept=0
The size in bytes of the data that was allocated.
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:110
virtual void setDLACore(int32_t dlaCore) noexcept=0
Set the DLA core that the deserialized engine must execute on.
IRefitter * createInferRefitter(ICudaEngine &engine, ILogger &logger)
Create an instance of an IRefitter class.
Definition: NvInferRuntime.h:2033
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:128
virtual const char * getBindingName(int32_t bindingIndex) const noexcept=0
Retrieve the name corresponding to a binding index.
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:739
virtual bool refitCudaEngine()=0
Updates associated engine. Return true if successful.
static constexpr int32_t kFORMAT_COMBINATION_LIMIT
Definition: NvInferRuntime.h:486
virtual Dims getStrides(int32_t bindingIndex) const noexcept=0
Return the strides of the buffer for the given binding.
virtual DataType type() const noexcept=0
The type of the memory that was allocated.
Updates weights in an engine.
Definition: NvInferRuntime.h:861
virtual int32_t enqueue(int32_t batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream)=0
Execute the layer.
virtual void setDeviceMemory(void *memory) noexcept=0
Set the device memory for use by this execution context.
virtual void setErrorRecorder(IErrorRecorder *recorder) noexcept=0
Set the ErrorRecorder for this interface.
kernel for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
Fail with error when the coordinates are out of bounds. This is the default.
virtual IErrorRecorder * getErrorRecorder() const =0
get the ErrorRecorder assigned to this interface.
virtual const char * getBindingFormatDesc(int32_t bindingIndex) const noexcept=0
Return the human readable description of the tensor format.
virtual bool supportsFormatCombination(int32_t pos, const PluginTensorDesc *inOut, int32_t nbInputs, int32_t nbOutputs)=0
Return true if plugin supports the format and datatype for the input/output indexed by pos.
virtual void setErrorRecorder(IErrorRecorder *recorder) noexcept=0
Set the ErrorRecorder for this interface.
virtual Dims getProfileDimensions(int32_t bindingIndex, int32_t profileIndex, OptProfileSelector select) const noexcept=0
Get the minimum / optimum / maximum dimensions for a particular binding under an optimization profile...
virtual void serialize(void *buffer)=0
Serialize the layer.
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntimeCommon.h:258
virtual bool hasImplicitBatchDimension() const =0
Query whether the engine was built with an implicit batch dimension.
int64_t count
The number of weights in the array.
Definition: NvInferRuntime.h:115
virtual int32_t getBindingVectorizedDim(int32_t bindingIndex) const noexcept=0
Return the dimension index that the buffer is vectorized.
Structure to define the dimensions of a tensor.
Definition: NvInferRuntimeCommon.h:227
virtual IErrorRecorder * getErrorRecorder() const noexcept=0
get the ErrorRecorder assigned to this interface.
virtual void destroy() noexcept=0
Destroy this object.
virtual size_t getWorkspaceSize(const PluginTensorDesc *inputs, int32_t nbInputs, const PluginTensorDesc *outputs, int32_t nbOutputs) const =0
Find the workspace size required by the layer.
Plugin factory for deserialization.
Definition: NvInferRuntime.h:999
Application-implemented logging interface for the builder, engine and runtime.
Definition: NvInferRuntimeCommon.h:1078
constexpr int32_t EnumMax< DeviceType >()
Maximum number of elements in DeviceType enum.
Definition: NvInferRuntime.h:747
constexpr int32_t EnumMax< EngineCapability >()
Maximum number of elements in EngineCapability enum.
Definition: NvInferRuntime.h:93
virtual void setGpuAllocator(IGpuAllocator *allocator) noexcept=0
Set the GPU allocator.
TRT_DEPRECATED bool canBroadcastInputAcrossBatch(int32_t) const _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:613
An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:1190
virtual bool allInputShapesSpecified() const noexcept=0
Whether all input shape bindings have been specified.
virtual IExecutionContext * createExecutionContextWithoutDeviceMemory() noexcept=0
create an execution context without any device memory allocated
virtual void destroy() noexcept=0
Destroy this object;.
#define TRTNOEXCEPT
Definition: NvInferRuntimeCommon.h:103
nvinfer1::ICudaEngine * deserializeCudaEngine(const void *blob, std::size_t size) noexcept
Deserialize an engine from a stream when plugin factory is not used.
Definition: NvInferRuntime.h:848
virtual Dims getDimensions(const char *inputName, OptProfileSelector select) const noexcept=0
Get the minimum / optimum / maximum dimensions for a dynamic input tensor.
virtual TRT_DEPRECATED bool setOptimizationProfile(int32_t profileIndex) noexcept=0
Select an optimization profile for the current context.
virtual const IDimensionExpr * operation(DimensionOperation op, const IDimensionExpr &first, const IDimensionExpr &second)=0
virtual size_t getWorkspaceSize(int32_t maxBatchSize) const =0
Find the workspace size required by the layer.
virtual IErrorRecorder * getErrorRecorder() const noexcept=0
get the ErrorRecorder assigned to this interface.
virtual void setErrorRecorder(IErrorRecorder *recorder)=0
Set the ErrorRecorder for this interface.
TRT_DEPRECATED void configurePlugin(const Dims *, int32_t, const Dims *, int32_t, const DataType *, const DataType *, const bool *, const bool *, PluginFormat, int32_t) _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:646
int32_t nbDims
The number of dimensions.
Definition: NvInferRuntime.h:411
virtual void setDebugSync(bool sync) noexcept=0
Set the debug sync flag.
virtual const char * getName() const noexcept=0
Return the name of the execution context.
The TensorRT API version 1 namespace.
virtual int32_t getMissing(int32_t size, const char **layerNames, WeightsRole *roles)=0
Get description of missing weights.
virtual Dims getBindingDimensions(int32_t bindingIndex) const noexcept=0
Get the dynamic dimensions of a binding.
Definition: NvInferRuntime.h:450
virtual void configure(const Dims *inputDims, int32_t nbInputs, const Dims *outputDims, int32_t nbOutputs, int32_t maxBatchSize)=0
Configure the layer.
int32_t getTensorRTVersion() const _TENSORRT_OVERRIDE
Return the API version with which this plugin was built.
Definition: NvInferRuntime.h:567
IPluginV2DynamicExt * clone() const _TENSORRT_OVERRIDE=0
Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin...
This is used to set or get the value that is used in the optimization (kernel selection).
virtual int32_t getNbLayers() const noexcept=0
Get the number of layers in the network.
virtual float getExtraMemoryTarget() const noexcept=0
Get the extra memory target that has been defined for this profile.
virtual const int32_t * getShapeValues(const char *inputName, OptProfileSelector select) const noexcept=0
Get the minimum / optimum / maximum values for an input shape tensor.
Dims max
Upper bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:429
TRT_DEPRECATED bool isOutputBroadcastAcrossBatch(int32_t, const bool *, int32_t) const _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:599
Check if two elements are equal.
virtual float getDynamicRangeMax(const char *tensorName) const =0
Get maximum of dynamic range.
struct CUstream_st * cudaStream_t
Forward declaration of cudaStream_t.
Definition: NvInferRuntimeCommon.h:115
virtual IErrorRecorder * getErrorRecorder() const noexcept=0
get the ErrorRecorder assigned to this interface.
virtual void configurePlugin(const DynamicPluginTensorDesc *in, int32_t nbInputs, const DynamicPluginTensorDesc *out, int32_t nbOutputs)=0
Configure the layer.
TRT_DEPRECATED size_t getWorkspaceSize(int32_t) const _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:665
virtual TensorFormat getBindingFormat(int32_t bindingIndex) const noexcept=0
Return the binding format.
virtual bool executeV2(void **bindings) noexcept=0
Synchronously execute inference a network.
virtual bool supportsFormat(DataType type, PluginFormat format) const =0
Check format support.
DataType type
The type of the weights.
Definition: NvInferRuntime.h:113
bias for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
virtual bool execute(int32_t batchSize, void **bindings) noexcept=0
Synchronously execute inference on a batch.
#define _TENSORRT_OVERRIDE
Items that are marked as deprecated will be removed in a future release.
Definition: NvInferRuntimeCommon.h:62
struct CUevent_st * cudaEvent_t
Forward declaration of cudaEvent_t.
Definition: NvInferRuntimeCommon.h:116
Dims min
Lower bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:426
void configure(const Dims *, int32_t, const Dims *, int32_t, int32_t) _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:312
virtual Dims getBindingDimensions(int32_t bindingIndex) const noexcept=0
Get the dimensions of a binding.
TRT_DEPRECATED bool supportsFormat(DataType, PluginFormat) const _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:629
virtual TensorLocation getLocation(int32_t bindingIndex) const noexcept=0
Get location of binding.
virtual nvinfer1::ICudaEngine * deserializeCudaEngine(const void *blob, std::size_t size, IPluginFactory *pluginFactory) noexcept=0
Deserialize an engine from a stream.
virtual int32_t getNbDLACores() const noexcept=0
Returns number of DLA hardware cores accessible.
shift part of IScaleLayer
Plugin class for user-implemented layers.
Definition: NvInferRuntime.h:256
virtual bool bindingIsInput(int32_t bindingIndex) const noexcept=0
Determine whether a binding is an input binding.
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:1067
DataType
The type of weights and tensors.
Definition: NvInferRuntimeCommon.h:169
virtual int32_t getBindingIndex(const char *name) const noexcept=0
Retrieve the binding index for a named tensor.
virtual int32_t initialize()=0
Initialize the layer for execution. This is called when the engine is created.
virtual bool isValid() const noexcept=0
Check whether the optimization profile can be passed to an IBuilderConfig object.
virtual int32_t getAll(int32_t size, const char **layerNames, WeightsRole *roles)=0
Get description of all weights that could be refit.
Definition: NvInferRuntime.h:408
Floor division of the first element by the second.
PluginTensorDesc desc
Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of ...
Definition: NvInferRuntime.h:423
static const int32_t MAX_DIMS
The maximum number of dimensions supported for a tensor.
Definition: NvInferRuntimeCommon.h:230
virtual DataType getBindingDataType(int32_t bindingIndex) const noexcept=0
Determine the required data type for a buffer from its binding index.
Substract the second element from the first.
virtual bool allInputDimensionsSpecified() const noexcept=0
Whether all dynamic dimensions of input tensors have been specified.
virtual bool isConstant() const =0
Return true if expression is a build-time constant.
Reference counted application-implemented error reporting interface for TensorRT objects.
Definition: NvInferRuntimeCommon.h:1235
virtual int32_t getNbOutputs() const =0
Get the number of outputs from the layer.
virtual int32_t getTensorRTVersion() const
Return the API version with which this plugin was built.
Definition: NvInferRuntime.h:265
virtual bool setDynamicRange(const char *tensorName, float min, float max)=0
virtual int32_t enqueue(const PluginTensorDesc *inputDesc, const PluginTensorDesc *outputDesc, const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream)=0
Execute the layer.
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:1031
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:718
virtual bool getShapeBinding(int32_t bindingIndex, int32_t *data) const noexcept=0
Get values of an input tensor required for shape calculations or an output tensor produced by shape c...
IRuntime * createInferRuntime(ILogger &logger)
Create an instance of an IRuntime class.
Definition: NvInferRuntime.h:2023
virtual int32_t getMaxBatchSize() const noexcept=0
Get the maximum batch size which can be used for inference.
Plugin class for user-implemented layers.
Definition: NvInferRuntimeCommon.h:583
TRT_DEPRECATED int32_t enqueue(int32_t, const void *const *, void **, void *, cudaStream_t) _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:681
Definition: NvInferRuntime.h:358
Product of the two elements.
virtual const int32_t * getProfileShapeValues(int32_t profileIndex, int32_t inputIndex, OptProfileSelector select) const noexcept=0
Get minimum / optimum / maximum values for an input shape binding under an optimization profile.
virtual bool enqueue(int32_t batchSize, void **bindings, cudaStream_t stream, cudaEvent_t *inputConsumed) noexcept=0
Asynchronously execute inference on a batch.
EngineCapability
Forward declaration of IPluginFactory for use by other interfaces.
Definition: NvInferRuntime.h:82
virtual void terminate()=0
Release resources acquired during plugin layer initialization. This is called when the engine is dest...
constexpr int32_t EnumMax< DimensionOperation >()
Maximum number of elements in DimensionOperation enum.
Definition: NvInferRuntime.h:343
virtual const char * getName() const noexcept=0
Returns the name of the network associated with the engine.
virtual const IDimensionExpr * constant(int32_t value)=0
Return pointer to IDimensionExp for given value.
Application-implemented interface for profiling.
Definition: NvInferRuntime.h:698
Check if element in first tensor is less than corresponding element in second tensor.
virtual DimsExprs getOutputDimensions(int32_t outputIndex, const DimsExprs *inputs, int32_t nbInputs, IExprBuilder &exprBuilder)=0
Get expressions for computing dimensions of an output tensor from dimensions of the input tensors.
virtual int32_t getDLACore() const noexcept=0
Get the DLA core that the engine executes on.
virtual bool setExtraMemoryTarget(float target) noexcept=0
Set a target for extra GPU memory that may be used by this profile.
virtual size_t getSerializationSize()=0
Find the size of the serialization buffer required.
Fields that a plugin might see for an input or output.
Definition: NvInferRuntimeCommon.h:369
const void * values
The weight values, in a contiguous array.
Definition: NvInferRuntime.h:114
virtual int32_t getOptimizationProfile() const noexcept=0
Get the index of the currently selected optimization profile.
virtual Dims getOutputDimensions(int32_t index, const Dims *inputs, int32_t nbInputDims)=0
Get the dimension of an output tensor.
virtual int32_t getNbShapeValues(const char *inputName) const noexcept=0
Get the number of values for an input shape tensor.
virtual bool setBindingDimensions(int32_t bindingIndex, Dims dimensions) noexcept=0
Set the dynamic dimensions of a binding.
virtual EngineCapability getEngineCapability() const noexcept=0
determine that execution capability this engine has.
Plugin class for user-implemented layers.
Definition: NvInferRuntime.h:146
virtual bool setDimensions(const char *inputName, OptProfileSelector select, Dims dims) noexcept=0
Set the minimum / optimum / maximum dimensions for a dynamic input tensor.
virtual void setName(const char *name) noexcept=0
Set the name of the execution context.
Application-implemented class for controlling allocation on the GPU.
Definition: NvInferRuntimeCommon.h:1035
TRT_DEPRECATED Dims getOutputDimensions(int32_t, const Dims *, int32_t) _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:585
virtual bool isExecutionBinding(int32_t bindingIndex) const noexcept=0
True if pointer to tensor data is required for execution phase, false if nullptr can be supplied.
virtual int32_t getConstantValue() const =0
virtual bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept=0
Select an optimization profile for the current context with async semantics.
virtual void setErrorRecorder(IErrorRecorder *recorder) noexcept=0
Set the ErrorRecorder for this interface.
virtual TRT_DEPRECATED std::size_t getWorkspaceSize() const noexcept=0
Get the amount of workspace the engine uses.
Allows a serialized functionally unsafe engine to be deserialized.
Definition: NvInferRuntime.h:759
virtual int32_t getBindingBytesPerComponent(int32_t bindingIndex) const noexcept=0
Return the number of bytes per component of an element.
virtual int32_t getNbBindings() const noexcept=0
Get the number of binding indices.
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntimeCommon.h:1017
Context for executing inference using an engine, with functionally unsafe features.
Definition: NvInferRuntime.h:1616
virtual void * data() const noexcept=0
A pointer to the raw data that is owned by the library.
virtual size_t getDeviceMemorySize() const noexcept=0
Return the amount of device memory required by an execution context.
virtual IExecutionContext * createExecutionContext() noexcept=0
Create an execution context.
virtual bool enqueueV2(void **bindings, cudaStream_t stream, cudaEvent_t *inputConsumed) noexcept=0
Asynchronously execute inference.
virtual IHostMemory * serialize() const noexcept=0
Serialize the network to a stream.
virtual void setProfiler(IProfiler *) noexcept=0
Set the profiler.
virtual bool setShapeValues(const char *inputName, OptProfileSelector select, const int32_t *values, int32_t nbValues) noexcept=0
Set the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:389
virtual bool isShapeBinding(int32_t bindingIndex) const noexcept=0
True if tensor is required as input for shape calculations or output from them.
virtual bool isRefittable() const noexcept=0
Return true if engine can be refit.
virtual int32_t getBindingComponentsPerElement(int32_t bindingIndex) const noexcept=0
Return the number of components included in one element.
constexpr int32_t EnumMax< WeightsRole >()
Maximum number of elements in WeightsRole enum.
Definition: NvInferRuntime.h:729