Go to the documentation of this file.
50 #ifndef NV_INFER_RUNTIME_H
51 #define NV_INFER_RUNTIME_H
64 class IExecutionContext;
128 virtual void*
data()
const noexcept = 0;
129 virtual std::size_t
size()
const noexcept = 0;
131 virtual void destroy() noexcept = 0;
182 virtual void configure(
const Dims* inputDims,
int nbInputs,
const Dims* outputDims,
int nbOutputs,
int maxBatchSize) TRTNOEXCEPT = 0;
195 virtual void terminate() TRTNOEXCEPT = 0;
218 virtual int enqueue(
int batchSize,
const void*
const* inputs,
void** outputs,
void* workspace,
cudaStream_t stream) TRTNOEXCEPT = 0;
234 virtual void serialize(
void* buffer) TRTNOEXCEPT = 0;
257 return NV_TENSORRT_VERSION;
563 return Dims{-1, {}, {}};
610 const bool* ,
const bool* ,
PluginFormat ,
int ) _TENSORRT_FINAL TRTNOEXCEPT {}
657 virtual void reportLayerTime(
const char* layerName,
float ms) TRTNOEXCEPT = 0;
728 virtual void setDLACore(
int dlaCore) noexcept = 0;
746 virtual void destroy() noexcept = 0;
861 virtual int getAll(
int size,
const char** layerNames,
WeightsRole* roles) TRTNOEXCEPT = 0;
863 virtual void destroy() TRTNOEXCEPT = 0;
881 virtual bool setDynamicRange(
const char* tensorName,
float min,
float max) TRTNOEXCEPT = 0;
964 virtual IPlugin*
createPlugin(
const char* layerName,
const void* serialData,
size_t serialLength) TRTNOEXCEPT = 0;
1075 const char* inputName,
OptProfileSelector select,
const int32_t* values,
int nbValues) noexcept = 0;
1123 virtual bool isValid()
const noexcept = 0;
1184 virtual const char*
getBindingName(
int bindingIndex)
const noexcept = 0;
1194 virtual bool bindingIsInput(
int bindingIndex)
const noexcept = 0;
1282 virtual void destroy() noexcept = 0;
1386 virtual const char*
getName()
const noexcept = 0;
1476 virtual bool isShapeBinding(
int bindingIndex)
const noexcept = 0;
1566 virtual bool execute(
int batchSize,
void** bindings) noexcept = 0;
1623 virtual void destroy() noexcept = 0;
1636 virtual void setName(
const char* name) noexcept = 0;
1643 virtual const char*
getName()
const noexcept = 0;
1796 virtual bool getShapeBinding(
int bindingIndex, int32_t* data)
const noexcept = 0;
1859 virtual bool executeV2(
void** bindings) noexcept = 0;
1887 extern "C" TENSORRTAPI
void* createInferRuntime_INTERNAL(
void* logger,
int version);
1893 extern "C" TENSORRTAPI
void* createInferRefitter_INTERNAL(
void* engine,
void* logger,
int version);
1906 return static_cast<IRuntime*>(createInferRuntime_INTERNAL(&logger, NV_TENSORRT_VERSION));
1916 return static_cast<IRefitter*>(createInferRefitter_INTERNAL(&engine, &logger, NV_TENSORRT_VERSION));
1921 #endif // NV_INFER_RUNTIME_H
virtual IPlugin * createPlugin(const char *layerName, const void *serialData, size_t serialLength)=0
Create a plugin from serialized data.
virtual int getTensorsWithDynamicRange(int size, const char **tensorNames) const =0
Get names of all tensors that have refittable dynamic ranges.
virtual IProfiler * getProfiler() const noexcept=0
Get the profiler.
virtual float getDynamicRangeMin(const char *tensorName) const =0
Get minimum of dynamic range.
constexpr int EnumMax< OptProfileSelector >()
Number of different values of OptProfileSelector enum.
Definition: NvInferRuntime.h:987
virtual bool getDebugSync() const noexcept=0
Get the debug sync flag.
virtual void reportLayerTime(const char *layerName, float ms)=0
Layer time reporting callback.
virtual void destroy() noexcept=0
Destroy this object.
virtual const ICudaEngine & getEngine() const noexcept=0
Get the associated engine.
Definition: NvInferRuntime.h:404
virtual const IDimensionExpr * constant(int value)=0
Return pointer to IDimensionExp for given value.
virtual bool setOptimizationProfile(int profileIndex) noexcept=0
Select an optimization profile for the current context.
virtual bool setWeights(const char *layerName, WeightsRole role, Weights weights)=0
Specify new weights for a layer of given name. Returns true on success, or false if new weights are r...
virtual Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims)=0
Get the dimension of an output tensor.
virtual void destroy() noexcept=0
Destroy the allocated memory.
const IDimensionExpr * d[Dims::MAX_DIMS]
The extent of each dimension.
Definition: NvInferRuntime.h:396
virtual std::size_t size() const noexcept=0
The size in bytes of the data that was allocated.
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:107
IRefitter * createInferRefitter(ICudaEngine &engine, ILogger &logger)
Create an instance of an IRefitter class.
Definition: NvInferRuntime.h:1914
virtual bool bindingIsInput(int bindingIndex) const noexcept=0
Determine whether a binding is an input binding.
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:125
virtual size_t getWorkspaceSize(int maxBatchSize) const =0
Find the workspace size required by the layer.
virtual bool refitCudaEngine()=0
Updates associated engine. Return true if successful.
TRT_DEPRECATED bool canBroadcastInputAcrossBatch(int) const _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:583
virtual int initialize()=0
Initialize the layer for execution. This is called when the engine is created.
TRT_DEPRECATED size_t getWorkspaceSize(int) const _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:622
virtual int enqueue(const PluginTensorDesc *inputDesc, const PluginTensorDesc *outputDesc, const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream)=0
Execute the layer.
virtual DataType type() const noexcept=0
The type of the memory that was allocated.
Updates weights in an engine.
Definition: NvInferRuntime.h:809
virtual int getTensorRTVersion() const
Return the API version with which this plugin was built.
Definition: NvInferRuntime.h:255
virtual int getDLACore() const noexcept=0
Get the DLA core that the engine executes on.
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntimeCommon.h:960
bias for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
Substract the second element from the first.
constexpr int EnumMax< WeightsRole >()
Maximum number of elements in WeightsRole enum.
Definition: NvInferRuntime.h:678
virtual void setDeviceMemory(void *memory) noexcept=0
Set the device memory for use by this execution context.
virtual void setErrorRecorder(IErrorRecorder *recorder) noexcept=0
Set the ErrorRecorder for this interface.
TRT_DEPRECATED void configurePlugin(const Dims *, int, const Dims *, int, const DataType *, const DataType *, const bool *, const bool *, PluginFormat, int) _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:608
virtual const int32_t * getProfileShapeValues(int profileIndex, int inputIndex, OptProfileSelector select) const noexcept=0
Get minimum / optimum / maximum values for an input shape binding under an optimization profile.
virtual Dims getBindingDimensions(int bindingIndex) const noexcept=0
Get the dimensions of a binding.
virtual IErrorRecorder * getErrorRecorder() const =0
get the ErrorRecorder assigned to this interface.
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:979
virtual bool setShapeValues(const char *inputName, OptProfileSelector select, const int32_t *values, int nbValues) noexcept=0
Set the minimum / optimum / maximum values for an input shape tensor.
int getTensorRTVersion() const _TENSORRT_OVERRIDE
Return the API version with which this plugin was built.
Definition: NvInferRuntime.h:545
virtual int getAll(int size, const char **layerNames, WeightsRole *roles)=0
Get description of all weights that could be refit.
virtual void setErrorRecorder(IErrorRecorder *recorder) noexcept=0
Set the ErrorRecorder for this interface.
virtual void serialize(void *buffer)=0
Serialize the layer.
virtual bool hasImplicitBatchDimension() const =0
Query whether the engine was built with an implicit batch dimension.
Floor division of the first element by the second.
int64_t count
The number of weights in the array.
Definition: NvInferRuntime.h:112
virtual void configure(const Dims *inputDims, int nbInputs, const Dims *outputDims, int nbOutputs, int maxBatchSize)=0
Configure the layer.
Structure to define the dimensions of a tensor.
Definition: NvInferRuntimeCommon.h:219
virtual IErrorRecorder * getErrorRecorder() const noexcept=0
get the ErrorRecorder assigned to this interface.
void configure(const Dims *, int, const Dims *, int, int) _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:300
TRT_DEPRECATED Dims getOutputDimensions(int, const Dims *, int) _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:561
virtual void destroy() noexcept=0
Destroy this object.
Plugin factory for deserialization.
Definition: NvInferRuntime.h:947
Application-implemented logging interface for the builder, engine and runtime.
Definition: NvInferRuntimeCommon.h:1020
virtual Dims getProfileDimensions(int bindingIndex, int profileIndex, OptProfileSelector select) const noexcept=0
Get the minimum / optimum / maximum dimensions for a particular binding under an optimization profile...
virtual void setGpuAllocator(IGpuAllocator *allocator) noexcept=0
Set the GPU allocator.
virtual bool isShapeBinding(int bindingIndex) const noexcept=0
True if tensor is required as input for shape calculations or output from them.
virtual int getConstantValue() const =0
An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:1136
virtual bool allInputShapesSpecified() const noexcept=0
Whether all input shape bindings have been specified.
This is used to set or get the value that is used in the optimization (kernel selection).
virtual IExecutionContext * createExecutionContextWithoutDeviceMemory() noexcept=0
create an execution context without any device memory allocated
virtual void destroy() noexcept=0
Destroy this object;.
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:688
nvinfer1::ICudaEngine * deserializeCudaEngine(const void *blob, std::size_t size) noexcept
Deserialize an engine from a stream when plugin factory is not used.
Definition: NvInferRuntime.h:796
virtual int enqueue(int batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream)=0
Execute the layer.
virtual int getMissing(int size, const char **layerNames, WeightsRole *roles)=0
Get description of missing weights.
constexpr int EnumMax< DeviceType >()
Maximum number of elements in DeviceType enum.
Definition: NvInferRuntime.h:694
virtual DimsExprs getOutputDimensions(int outputIndex, const DimsExprs *inputs, int nbInputs, IExprBuilder &exprBuilder)=0
Get expressions for computing dimensions of an output tensor from dimensions of the input tensors.
virtual Dims getDimensions(const char *inputName, OptProfileSelector select) const noexcept=0
Get the minimum / optimum / maximum dimensions for a dynamic input tensor.
virtual void setDLACore(int dlaCore) noexcept=0
Set the DLA core that the deserialized engine must execute on.
virtual const IDimensionExpr * operation(DimensionOperation op, const IDimensionExpr &first, const IDimensionExpr &second)=0
virtual void configureWithFormat(const Dims *inputDims, int nbInputs, const Dims *outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize)=0
Configure the layer.
virtual IErrorRecorder * getErrorRecorder() const noexcept=0
get the ErrorRecorder assigned to this interface.
virtual void setErrorRecorder(IErrorRecorder *recorder)=0
Set the ErrorRecorder for this interface.
Product of the two elements.
TRT_DEPRECATED int enqueue(int, const void *const *, void **, void *, cudaStream_t) _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:632
virtual void setDebugSync(bool sync) noexcept=0
Set the debug sync flag.
virtual const char * getName() const noexcept=0
Return the name of the execution context.
The TensorRT API version 1 namespace.
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntimeCommon.h:249
Definition: NvInferRuntime.h:435
IPluginV2DynamicExt * clone() const _TENSORRT_OVERRIDE=0
Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin...
virtual int getBindingComponentsPerElement(int bindingIndex) const noexcept=0
Return the number of components included in one element.
virtual float getExtraMemoryTarget() const noexcept=0
Get the extra memory target that has been defined for this profile.
virtual const int32_t * getShapeValues(const char *inputName, OptProfileSelector select) const noexcept=0
Get the minimum / optimum / maximum values for an input shape tensor.
Dims max
Upper bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:413
virtual const char * getBindingName(int bindingIndex) const noexcept=0
Retrieve the name corresponding to a binding index.
virtual float getDynamicRangeMax(const char *tensorName) const =0
Get maximum of dynamic range.
struct CUstream_st * cudaStream_t
Forward declaration of cudaStream_t.
Definition: NvInferRuntimeCommon.h:112
virtual IErrorRecorder * getErrorRecorder() const noexcept=0
get the ErrorRecorder assigned to this interface.
virtual bool executeV2(void **bindings) noexcept=0
Synchronously execute inference a network.
virtual bool supportsFormat(DataType type, PluginFormat format) const =0
Check format support.
DataType type
The type of the weights.
Definition: NvInferRuntime.h:110
virtual int getNbBindings() const noexcept=0
Get the number of binding indices.
virtual int getBindingIndex(const char *name) const noexcept=0
Retrieve the binding index for a named tensor.
constexpr int EnumMax< EngineCapability >()
Maximum number of elements in EngineCapability enum.
Definition: NvInferRuntime.h:90
virtual bool getShapeBinding(int bindingIndex, int32_t *data) const noexcept=0
Get values of an input tensor required for shape calculations or an output tensor produced by shape c...
TRT_DEPRECATED bool isOutputBroadcastAcrossBatch(int, const bool *, int) const _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:572
#define _TENSORRT_OVERRIDE
Items that are marked as deprecated will be removed in a future release.
Definition: NvInferRuntimeCommon.h:62
struct CUevent_st * cudaEvent_t
Forward declaration of cudaEvent_t.
Definition: NvInferRuntimeCommon.h:113
Dims min
Lower bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:410
virtual int getNbDLACores() const noexcept=0
Returns number of DLA hardware cores accessible.
virtual bool isExecutionBinding(int bindingIndex) const noexcept=0
True if pointer to tensor data is required for execution phase, false if nullptr can be supplied.
Fail with error when the coordinates are out of bounds. This is the default.
DataType
The type of weights and tensors.
Definition: NvInferRuntimeCommon.h:163
TRT_DEPRECATED bool supportsFormat(DataType, PluginFormat) const _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:597
virtual bool supportsFormatCombination(int pos, const PluginTensorDesc *inOut, int nbInputs, int nbOutputs)=0
Return true if plugin supports the format and datatype for the input/output indexed by pos.
virtual nvinfer1::ICudaEngine * deserializeCudaEngine(const void *blob, std::size_t size, IPluginFactory *pluginFactory) noexcept=0
Deserialize an engine from a stream.
Plugin class for user-implemented layers.
Definition: NvInferRuntime.h:247
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:1014
Check if two elements are equal.
virtual bool isValid() const noexcept=0
Check whether the optimization profile can be passed to an IBuilderConfig object.
Definition: NvInferRuntime.h:392
virtual bool setBindingDimensions(int bindingIndex, Dims dimensions) noexcept=0
Set the dynamic dimensions of a binding.
PluginTensorDesc desc
Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of ...
Definition: NvInferRuntime.h:407
virtual int getOptimizationProfile() const noexcept=0
Get the index of the currently selected optimization profile.
virtual int getNbOptimizationProfiles() const noexcept=0
Get the number of optimization profiles defined for this engine.
virtual int getBindingBytesPerComponent(int bindingIndex) const noexcept=0
Return the number of bytes per component of an element.
static constexpr int kFORMAT_COMBINATION_LIMIT
Definition: NvInferRuntime.h:469
virtual bool allInputDimensionsSpecified() const noexcept=0
Whether all dynamic dimensions of input tensors have been specified.
virtual bool isConstant() const =0
Return true if expression is a build-time constant.
Reference counted application-implemented error reporting interface for TensorRT objects.
Definition: NvInferRuntimeCommon.h:1175
Safety restricted capability, TensorRT flow that can only run on DLA devices via NvMediaDLA APIs.
virtual bool setDynamicRange(const char *tensorName, float min, float max)=0
virtual TensorLocation getLocation(int bindingIndex) const noexcept=0
Get location of binding.
virtual TensorFormat getBindingFormat(int bindingIndex) const noexcept=0
Return the binding format.
Minimum of the two elements.
virtual int getNbOutputs() const =0
Get the number of outputs from the layer.
IRuntime * createInferRuntime(ILogger &logger)
Create an instance of an IRuntime class.
Definition: NvInferRuntime.h:1904
Plugin class for user-implemented layers.
Definition: NvInferRuntimeCommon.h:545
virtual int getNbShapeValues(const char *inputName) const noexcept=0
Get the number of values for an input shape tensor.
Definition: NvInferRuntime.h:342
virtual Dims getBindingDimensions(int bindingIndex) const noexcept=0
Get the dynamic dimensions of a binding.
static const int MAX_DIMS
The maximum number of dimensions supported for a tensor.
Definition: NvInferRuntimeCommon.h:222
virtual void terminate()=0
Release resources acquired during plugin layer initialization. This is called when the engine is dest...
kernel for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
virtual const char * getName() const noexcept=0
Returns the name of the network associated with the engine.
Application-implemented interface for profiling.
Definition: NvInferRuntime.h:648
virtual size_t getWorkspaceSize(const PluginTensorDesc *inputs, int nbInputs, const PluginTensorDesc *outputs, int nbOutputs) const =0
Find the workspace size required by the layer.
virtual bool enqueue(int batchSize, void **bindings, cudaStream_t stream, cudaEvent_t *inputConsumed) noexcept=0
Asynchronously execute inference on a batch.
virtual bool setExtraMemoryTarget(float target) noexcept=0
Set a target for extra GPU memory that may be used by this profile.
virtual size_t getSerializationSize()=0
Find the size of the serialization buffer required.
Fields that a plugin might see for an input or output.
Definition: NvInferRuntimeCommon.h:338
const void * values
The weight values, in a contiguous array.
Definition: NvInferRuntime.h:111
virtual const char * getBindingFormatDesc(int bindingIndex) const noexcept=0
Return the human readable description of the tensor format.
virtual void configurePlugin(const DynamicPluginTensorDesc *in, int nbInputs, const DynamicPluginTensorDesc *out, int nbOutputs)=0
Configure the layer.
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:668
virtual EngineCapability getEngineCapability() const noexcept=0
determine that execution capability this engine has.
Plugin class for user-implemented layers.
Definition: NvInferRuntime.h:143
int nbDims
The number of dimensions.
Definition: NvInferRuntime.h:395
virtual bool setDimensions(const char *inputName, OptProfileSelector select, Dims dims) noexcept=0
Set the minimum / optimum / maximum dimensions for a dynamic input tensor.
virtual void setName(const char *name) noexcept=0
Set the name of the execution context.
Application-implemented class for controlling allocation on the GPU.
Definition: NvInferRuntimeCommon.h:977
virtual int getNbLayers() const noexcept=0
Get the number of layers in the network.
virtual bool execute(int batchSize, void **bindings) noexcept=0
Synchronously execute inference on a batch.
virtual Dims getStrides(int bindingIndex) const noexcept=0
Return the strides of the buffer for the given binding.
Safety restricted capability, TensorRT flow that can only run on GPU devices via TensorRT nvinfer1::s...
virtual void setErrorRecorder(IErrorRecorder *recorder) noexcept=0
Set the ErrorRecorder for this interface.
virtual TRT_DEPRECATED std::size_t getWorkspaceSize() const noexcept=0
Get the amount of workspace the engine uses.
Allows a serialized functionally unsafe engine to be deserialized.
Definition: NvInferRuntime.h:707
shift part of IScaleLayer
EngineCapability
Forward declaration of IPluginFactory for use by other interfaces.
Definition: NvInferRuntime.h:82
virtual DataType getBindingDataType(int bindingIndex) const noexcept=0
Determine the required data type for a buffer from its binding index.
virtual int getBindingVectorizedDim(int bindingIndex) const noexcept=0
Return the dimension index that the buffer is vectorized.
Context for executing inference using an engine, with functionally unsafe features.
Definition: NvInferRuntime.h:1552
virtual void * data() const noexcept=0
A pointer to the raw data that is owned by the library.
virtual size_t getDeviceMemorySize() const noexcept=0
Return the amount of device memory required by an execution context.
virtual IExecutionContext * createExecutionContext() noexcept=0
Create an execution context.
virtual bool setInputShapeBinding(int bindingIndex, const int32_t *data) noexcept=0
Set values of input tensor required by shape calculations.
virtual bool enqueueV2(void **bindings, cudaStream_t stream, cudaEvent_t *inputConsumed) noexcept=0
Asynchronously execute inference.
virtual IHostMemory * serialize() const noexcept=0
Serialize the network to a stream.
virtual void setProfiler(IProfiler *) noexcept=0
Set the profiler.
Check if element in first tensor is less than corresponding element in second tensor.
constexpr int EnumMax< DimensionOperation >()
Maximum number of elements in DimensionOperation enum.
Definition: NvInferRuntime.h:327
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:313
Definition: NvInferRuntime.h:373
virtual int getMaxBatchSize() const noexcept=0
Get the maximum batch size which can be used for inference.
virtual bool isRefittable() const noexcept=0
Return true if engine can be refit.