50 #ifndef NV_INFER_RUNTIME_H 51 #define NV_INFER_RUNTIME_H 64 class IExecutionContext;
119 virtual void* data()
const noexcept = 0;
120 virtual std::size_t size()
const noexcept = 0;
121 virtual DataType type()
const noexcept = 0;
122 virtual void destroy() noexcept = 0;
144 virtual int getNbOutputs()
const TRTNOEXCEPT = 0;
155 virtual Dims getOutputDimensions(
int index,
const Dims* inputs,
int nbInputDims) TRTNOEXCEPT = 0;
173 virtual void configure(
const Dims* inputDims,
int nbInputs,
const Dims* outputDims,
int nbOutputs,
int maxBatchSize) TRTNOEXCEPT = 0;
180 virtual int initialize() TRTNOEXCEPT = 0;
186 virtual void terminate() TRTNOEXCEPT = 0;
196 virtual size_t getWorkspaceSize(
int maxBatchSize)
const TRTNOEXCEPT = 0;
209 virtual int enqueue(
int batchSize,
const void*
const* inputs,
void** outputs,
void* workspace,
cudaStream_t stream) TRTNOEXCEPT = 0;
216 virtual size_t getSerializationSize() TRTNOEXCEPT = 0;
225 virtual void serialize(
void* buffer) TRTNOEXCEPT = 0;
248 return NV_TENSORRT_VERSION;
279 virtual void configureWithFormat(
const Dims* inputDims,
int nbInputs,
const Dims* outputDims,
int nbOutputs,
DataType type,
PluginFormat format,
int maxBatchSize) TRTNOEXCEPT = 0;
333 virtual bool isConstant()
const = 0;
337 virtual int getConstantValue()
const = 0;
453 static constexpr
int kFORMAT_COMBINATION_LIMIT = 100;
486 virtual bool supportsFormatCombination(
int pos,
const PluginTensorDesc* inOut,
int nbInputs,
int nbOutputs) TRTNOEXCEPT = 0;
594 const bool* ,
const bool* ,
PluginFormat ,
int ) _TENSORRT_FINAL TRTNOEXCEPT {}
641 virtual void reportLayerTime(
const char* layerName,
float ms) TRTNOEXCEPT = 0;
710 virtual void setDLACore(
int dlaCore) noexcept = 0;
716 virtual int getDLACore()
const noexcept = 0;
721 virtual int getNbDLACores()
const noexcept = 0;
726 virtual void destroy() noexcept = 0;
740 virtual void setGpuAllocator(
IGpuAllocator* allocator) noexcept = 0;
754 virtual void setErrorRecorder(
IErrorRecorder* recorder) noexcept = 0;
778 return deserializeCudaEngine(blob, size,
nullptr);
802 virtual bool setWeights(
const char* layerName,
WeightsRole role,
Weights weights) TRTNOEXCEPT = 0;
809 virtual bool refitCudaEngine() TRTNOEXCEPT = 0;
827 virtual int getMissing(
int size,
const char** layerNames,
WeightsRole* roles) TRTNOEXCEPT = 0;
841 virtual int getAll(
int size,
const char** layerNames,
WeightsRole* roles) TRTNOEXCEPT = 0;
843 virtual void destroy() TRTNOEXCEPT = 0;
859 virtual bool setDynamicRange(
const char* tensorName,
float min,
float max) TRTNOEXCEPT = 0;
868 virtual float getDynamicRangeMin(
const char* tensorName)
const TRTNOEXCEPT = 0;
877 virtual float getDynamicRangeMax(
const char* tensorName)
const TRTNOEXCEPT = 0;
890 virtual int getTensorsWithDynamicRange(
int size,
const char** tensorNames)
const TRTNOEXCEPT = 0;
904 virtual void setErrorRecorder(
IErrorRecorder* recorder) TRTNOEXCEPT = 0;
942 virtual IPlugin* createPlugin(
const char* layerName,
const void* serialData,
size_t serialLength) TRTNOEXCEPT = 0;
1046 virtual bool setShapeValues(
1047 const char* inputName,
OptProfileSelector select,
const int32_t* values,
int nbValues) noexcept = 0;
1055 virtual int getNbShapeValues(
const char* inputName)
const noexcept = 0;
1062 virtual const int32_t* getShapeValues(
const char* inputName,
OptProfileSelector select)
const noexcept = 0;
1077 virtual bool setExtraMemoryTarget(
float target) noexcept = 0;
1082 virtual float getExtraMemoryTarget()
const noexcept = 0;
1095 virtual bool isValid()
const noexcept = 0;
1116 virtual int getNbBindings()
const noexcept = 0;
1131 virtual int getBindingIndex(
const char* name)
const noexcept = 0;
1143 virtual const char* getBindingName(
int bindingIndex)
const noexcept = 0;
1153 virtual bool bindingIsInput(
int bindingIndex)
const noexcept = 0;
1164 virtual Dims getBindingDimensions(
int bindingIndex)
const noexcept = 0;
1174 virtual DataType getBindingDataType(
int bindingIndex)
const noexcept = 0;
1181 virtual int getMaxBatchSize()
const noexcept = 0;
1191 virtual int getNbLayers()
const noexcept = 0;
1200 virtual std::size_t getWorkspaceSize()
const noexcept = 0;
1211 virtual IHostMemory* serialize()
const noexcept = 0;
1223 virtual void destroy() noexcept = 0;
1235 virtual TensorLocation getLocation(
int bindingIndex)
const noexcept = 0;
1247 virtual IExecutionContext* createExecutionContextWithoutDeviceMemory() noexcept = 0;
1254 virtual size_t getDeviceMemorySize()
const noexcept = 0;
1261 virtual bool isRefittable()
const noexcept = 0;
1272 virtual int getBindingBytesPerComponent(
int bindingIndex)
const noexcept = 0;
1283 virtual int getBindingComponentsPerElement(
int bindingIndex)
const noexcept = 0;
1290 virtual TensorFormat getBindingFormat(
int bindingIndex)
const noexcept = 0;
1306 virtual const char* getBindingFormatDesc(
int bindingIndex)
const noexcept = 0;
1315 virtual int getBindingVectorizedDim(
int bindingIndex)
const noexcept = 0;
1327 virtual const char* getName()
const noexcept = 0;
1335 virtual int getNbOptimizationProfiles()
const noexcept = 0;
1347 virtual Dims getProfileDimensions(
int bindingIndex,
int profileIndex,
OptProfileSelector select)
const noexcept = 0;
1364 virtual const int32_t* getProfileShapeValues(
int profileIndex,
int inputIndex,
OptProfileSelector select)
const 1399 virtual bool isShapeBinding(
int bindingIndex)
const noexcept = 0;
1410 virtual bool isExecutionBinding(
int bindingIndex)
const noexcept = 0;
1434 virtual void setErrorRecorder(
IErrorRecorder* recorder) noexcept = 0;
1473 virtual bool execute(
int batchSize,
void** bindings) noexcept = 0;
1488 virtual bool enqueue(
int batchSize,
void** bindings,
cudaStream_t stream,
cudaEvent_t* inputConsumed) noexcept = 0;
1497 virtual void setDebugSync(
bool sync) noexcept = 0;
1504 virtual bool getDebugSync()
const noexcept = 0;
1511 virtual void setProfiler(
IProfiler*) noexcept = 0;
1518 virtual IProfiler* getProfiler()
const noexcept = 0;
1525 virtual const ICudaEngine& getEngine()
const noexcept = 0;
1530 virtual void destroy() noexcept = 0;
1543 virtual void setName(
const char* name) noexcept = 0;
1550 virtual const char* getName()
const noexcept = 0;
1563 virtual void setDeviceMemory(
void* memory) noexcept = 0;
1573 virtual Dims getStrides(
int bindingIndex)
const noexcept = 0;
1604 virtual bool setOptimizationProfile(
int profileIndex) noexcept = 0;
1613 virtual int getOptimizationProfile()
const noexcept = 0;
1633 virtual bool setBindingDimensions(
int bindingIndex,
Dims dimensions) noexcept = 0;
1655 virtual Dims getBindingDimensions(
int bindingIndex)
const noexcept = 0;
1672 virtual bool setInputShapeBinding(
int bindingIndex,
const int32_t* data) noexcept = 0;
1690 virtual bool getShapeBinding(
int bindingIndex, int32_t* data)
const noexcept = 0;
1702 virtual bool allInputDimensionsSpecified()
const noexcept = 0;
1713 virtual bool allInputShapesSpecified()
const noexcept = 0;
1727 virtual void setErrorRecorder(
IErrorRecorder* recorder) noexcept = 0;
1753 virtual bool executeV2(
void** bindings) noexcept = 0;
1803 #endif // NV_INFER_RUNTIME_H An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:1108
Substract the second element from the first.
DataType
The type of weights and tensors.
Definition: NvInferRuntimeCommon.h:162
void * createInferRuntime_INTERNAL(void *logger, int version)
Internal C entry point for creating IRuntime.
PluginTensorDesc desc
Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of ...
Definition: NvInferRuntime.h:391
Safety restricted capability, TensorRT flow that can only run on GPU devices.
constexpr int EnumMax< WeightsRole >()
Maximum number of elements in WeightsRole enum.
Definition: NvInferRuntime.h:662
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:955
struct CUstream_st * cudaStream_t
Forward declaration of cudaStream_t.
Definition: NvInferRuntimeCommon.h:112
TRT_DEPRECATED bool canBroadcastInputAcrossBatch(int) const _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:567
const void * values
The weight values, in a contiguous array.
Definition: NvInferRuntime.h:102
void configure(const Dims *, int, const Dims *, int, int) _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:287
Definition: NvInferRuntime.h:419
Allows a serialized functionally unsafe engine to be deserialized.
Definition: NvInferRuntime.h:691
Plugin class for user-implemented layers.
Definition: NvInferRuntime.h:134
static const int MAX_DIMS
The maximum number of dimensions supported for a tensor.
Definition: NvInferRuntimeCommon.h:208
TRT_DEPRECATED bool isOutputBroadcastAcrossBatch(int, const bool *, int) const _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:556
bias for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntimeCommon.h:925
shift part of IScaleLayer
virtual int getTensorRTVersion() const
Return the API version with which this plugin was built.
Definition: NvInferRuntime.h:246
DataType type
The type of the weights.
Definition: NvInferRuntime.h:101
The TensorRT API version 1 namespace.
Plugin factory for deserialization.
Definition: NvInferRuntime.h:925
nvinfer1::ICudaEngine * deserializeCudaEngine(const void *blob, std::size_t size) noexcept
Definition: NvInferRuntime.h:776
This is used to set or get the value that is used in the optimization (kernel selection).
Full capability, TensorRT mode without any restrictions.
1 if operands are equal, 0 otherwise.
Product of the two elements.
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:990
TRT_DEPRECATED Dims getOutputDimensions(int, const Dims *, int) _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:543
int getTensorRTVersion() const _TENSORRT_OVERRIDE
Return the API version with which this plugin was built.
Definition: NvInferRuntime.h:527
int nbDims
The number of dimensions.
Definition: NvInferRuntime.h:379
constexpr int EnumMax< DimensionOperation >()
Maximum number of elements in DimensionOperation enum.
Definition: NvInferRuntime.h:314
Reference counted application-implemented error reporting interface for TensorRT objects.
Definition: NvInferRuntimeCommon.h:1141
1 if first operand is less than second operand, 0 otherwise.
TRT_DEPRECATED int enqueue(int, const void *const *, void **, void *, cudaStream_t) _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:616
Structure to define the dimensions of a tensor.
Definition: NvInferRuntimeCommon.h:205
constexpr int EnumMax< DeviceType >()
Maximum number of elements in DeviceType enum.
Definition: NvInferRuntime.h:678
Minimum of the two elements.
int64_t count
The number of weights in the array.
Definition: NvInferRuntime.h:103
Definition: NvInferRuntime.h:357
kernel for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:98
int nbDims
The number of dimensions.
Definition: NvInferRuntimeCommon.h:209
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntimeCommon.h:235
Application-implemented logging interface for the builder, engine and runtime.
Definition: NvInferRuntimeCommon.h:985
constexpr int EnumMax< EngineCapability >()
Maximum number of elements in EngineCapability enum.
Definition: NvInferRuntime.h:84
Plugin class for user-implemented layers.
Definition: NvInferRuntime.h:238
Floor division of the first element by the second.
Fields that a plugin might see for an input or output.
Definition: NvInferRuntimeCommon.h:310
EngineCapability
Forward declaration of IPluginFactory for use by other interfaces.
Definition: NvInferRuntime.h:76
Safety restricted capability, TensorRT flow that can only run on DLA devices.
Definition: NvInferRuntime.h:329
Definition: NvInferRuntime.h:388
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:116
constexpr int EnumMax< OptProfileSelector >()
Number of different values of OptProfileSelector enum.
Definition: NvInferRuntime.h:963
Dims max
Upper bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:397
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:300
#define _TENSORRT_OVERRIDE
Items that are marked as deprecated will be removed in a future release.
Definition: NvInferRuntimeCommon.h:62
Application-implemented class for controlling allocation on the GPU.
Definition: NvInferRuntimeCommon.h:942
Context for executing inference using an engine, with functionally unsafe features.
Definition: NvInferRuntime.h:1459
Updates weights in an engine.
Definition: NvInferRuntime.h:789
Dims min
Lower bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:394
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:672
TRT_DEPRECATED size_t getWorkspaceSize(int) const _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:606
Definition: NvInferRuntime.h:376
Application-implemented interface for profiling.
Definition: NvInferRuntime.h:632
Plugin class for user-implemented layers.
Definition: NvInferRuntimeCommon.h:513
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:652
TRT_DEPRECATED bool supportsFormat(DataType, PluginFormat) const _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:581
struct CUevent_st * cudaEvent_t
Forward declaration of cudaEvent_t.
Definition: NvInferRuntimeCommon.h:113
void * createInferRefitter_INTERNAL(void *engine, void *logger, int version)
Internal C entry point for creating IRefitter.
TRT_DEPRECATED void configurePlugin(const Dims *, int, const Dims *, int, const DataType *, const DataType *, const bool *, const bool *, PluginFormat, int) _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:592