50 #ifndef NV_INFER_RUNTIME_H 51 #define NV_INFER_RUNTIME_H 64 class IExecutionContext;
119 virtual void* data()
const noexcept = 0;
120 virtual std::size_t size()
const noexcept = 0;
121 virtual DataType type()
const noexcept = 0;
122 virtual void destroy() noexcept = 0;
144 virtual int getNbOutputs()
const TRTNOEXCEPT = 0;
155 virtual Dims getOutputDimensions(
int index,
const Dims* inputs,
int nbInputDims) TRTNOEXCEPT = 0;
173 virtual void configure(
const Dims* inputDims,
int nbInputs,
const Dims* outputDims,
int nbOutputs,
int maxBatchSize) TRTNOEXCEPT = 0;
180 virtual int initialize() TRTNOEXCEPT = 0;
186 virtual void terminate() TRTNOEXCEPT = 0;
196 virtual size_t getWorkspaceSize(
int maxBatchSize)
const TRTNOEXCEPT = 0;
209 virtual int enqueue(
int batchSize,
const void*
const* inputs,
void** outputs,
void* workspace,
cudaStream_t stream) TRTNOEXCEPT = 0;
216 virtual size_t getSerializationSize() TRTNOEXCEPT = 0;
225 virtual void serialize(
void* buffer) TRTNOEXCEPT = 0;
248 return NV_TENSORRT_VERSION;
279 virtual void configureWithFormat(
const Dims* inputDims,
int nbInputs,
const Dims* outputDims,
int nbOutputs,
DataType type,
PluginFormat format,
int maxBatchSize) TRTNOEXCEPT = 0;
333 virtual bool isConstant()
const = 0;
337 virtual int getConstantValue()
const = 0;
456 static constexpr
int kFORMAT_COMBINATION_LIMIT = 100;
489 virtual bool supportsFormatCombination(
int pos,
const PluginTensorDesc* inOut,
int nbInputs,
int nbOutputs) TRTNOEXCEPT = 0;
550 return Dims{-1, {}, {}};
597 const bool* ,
const bool* ,
PluginFormat ,
int ) _TENSORRT_FINAL TRTNOEXCEPT {}
644 virtual void reportLayerTime(
const char* layerName,
float ms) TRTNOEXCEPT = 0;
713 virtual void setDLACore(
int dlaCore) noexcept = 0;
719 virtual int getDLACore()
const noexcept = 0;
724 virtual int getNbDLACores()
const noexcept = 0;
729 virtual void destroy() noexcept = 0;
743 virtual void setGpuAllocator(
IGpuAllocator* allocator) noexcept = 0;
757 virtual void setErrorRecorder(
IErrorRecorder* recorder) noexcept = 0;
781 return deserializeCudaEngine(blob, size,
nullptr);
805 virtual bool setWeights(
const char* layerName,
WeightsRole role,
Weights weights) TRTNOEXCEPT = 0;
812 virtual bool refitCudaEngine() TRTNOEXCEPT = 0;
830 virtual int getMissing(
int size,
const char** layerNames,
WeightsRole* roles) TRTNOEXCEPT = 0;
844 virtual int getAll(
int size,
const char** layerNames,
WeightsRole* roles) TRTNOEXCEPT = 0;
846 virtual void destroy() TRTNOEXCEPT = 0;
864 virtual bool setDynamicRange(
const char* tensorName,
float min,
float max) TRTNOEXCEPT = 0;
873 virtual float getDynamicRangeMin(
const char* tensorName)
const TRTNOEXCEPT = 0;
882 virtual float getDynamicRangeMax(
const char* tensorName)
const TRTNOEXCEPT = 0;
895 virtual int getTensorsWithDynamicRange(
int size,
const char** tensorNames)
const TRTNOEXCEPT = 0;
909 virtual void setErrorRecorder(
IErrorRecorder* recorder) TRTNOEXCEPT = 0;
947 virtual IPlugin* createPlugin(
const char* layerName,
const void* serialData,
size_t serialLength) TRTNOEXCEPT = 0;
1053 virtual bool setShapeValues(
1054 const char* inputName,
OptProfileSelector select,
const int32_t* values,
int nbValues) noexcept = 0;
1062 virtual int getNbShapeValues(
const char* inputName)
const noexcept = 0;
1069 virtual const int32_t* getShapeValues(
const char* inputName,
OptProfileSelector select)
const noexcept = 0;
1084 virtual bool setExtraMemoryTarget(
float target) noexcept = 0;
1089 virtual float getExtraMemoryTarget()
const noexcept = 0;
1102 virtual bool isValid()
const noexcept = 0;
1126 virtual int getNbBindings()
const noexcept = 0;
1141 virtual int getBindingIndex(
const char* name)
const noexcept = 0;
1153 virtual const char* getBindingName(
int bindingIndex)
const noexcept = 0;
1163 virtual bool bindingIsInput(
int bindingIndex)
const noexcept = 0;
1174 virtual Dims getBindingDimensions(
int bindingIndex)
const noexcept = 0;
1184 virtual DataType getBindingDataType(
int bindingIndex)
const noexcept = 0;
1193 virtual int getMaxBatchSize()
const noexcept = 0;
1203 virtual int getNbLayers()
const noexcept = 0;
1212 virtual std::size_t getWorkspaceSize()
const noexcept = 0;
1223 virtual IHostMemory* serialize()
const noexcept = 0;
1235 virtual void destroy() noexcept = 0;
1247 virtual TensorLocation getLocation(
int bindingIndex)
const noexcept = 0;
1259 virtual IExecutionContext* createExecutionContextWithoutDeviceMemory() noexcept = 0;
1266 virtual size_t getDeviceMemorySize()
const noexcept = 0;
1273 virtual bool isRefittable()
const noexcept = 0;
1284 virtual int getBindingBytesPerComponent(
int bindingIndex)
const noexcept = 0;
1295 virtual int getBindingComponentsPerElement(
int bindingIndex)
const noexcept = 0;
1302 virtual TensorFormat getBindingFormat(
int bindingIndex)
const noexcept = 0;
1318 virtual const char* getBindingFormatDesc(
int bindingIndex)
const noexcept = 0;
1327 virtual int getBindingVectorizedDim(
int bindingIndex)
const noexcept = 0;
1339 virtual const char* getName()
const noexcept = 0;
1347 virtual int getNbOptimizationProfiles()
const noexcept = 0;
1359 virtual Dims getProfileDimensions(
int bindingIndex,
int profileIndex,
OptProfileSelector select)
const noexcept = 0;
1376 virtual const int32_t* getProfileShapeValues(
int profileIndex,
int inputIndex,
OptProfileSelector select)
const 1411 virtual bool isShapeBinding(
int bindingIndex)
const noexcept = 0;
1422 virtual bool isExecutionBinding(
int bindingIndex)
const noexcept = 0;
1446 virtual void setErrorRecorder(
IErrorRecorder* recorder) noexcept = 0;
1474 virtual bool hasImplicitBatchDimension()
const TRTNOEXCEPT = 0;
1501 virtual bool execute(
int batchSize,
void** bindings) noexcept = 0;
1516 virtual bool enqueue(
int batchSize,
void** bindings,
cudaStream_t stream,
cudaEvent_t* inputConsumed) noexcept = 0;
1525 virtual void setDebugSync(
bool sync) noexcept = 0;
1532 virtual bool getDebugSync()
const noexcept = 0;
1539 virtual void setProfiler(
IProfiler*) noexcept = 0;
1546 virtual IProfiler* getProfiler()
const noexcept = 0;
1553 virtual const ICudaEngine& getEngine()
const noexcept = 0;
1558 virtual void destroy() noexcept = 0;
1571 virtual void setName(
const char* name) noexcept = 0;
1578 virtual const char* getName()
const noexcept = 0;
1591 virtual void setDeviceMemory(
void* memory) noexcept = 0;
1601 virtual Dims getStrides(
int bindingIndex)
const noexcept = 0;
1629 virtual bool setOptimizationProfile(
int profileIndex) noexcept = 0;
1638 virtual int getOptimizationProfile()
const noexcept = 0;
1658 virtual bool setBindingDimensions(
int bindingIndex,
Dims dimensions) noexcept = 0;
1680 virtual Dims getBindingDimensions(
int bindingIndex)
const noexcept = 0;
1697 virtual bool setInputShapeBinding(
int bindingIndex,
const int32_t* data) noexcept = 0;
1715 virtual bool getShapeBinding(
int bindingIndex, int32_t* data)
const noexcept = 0;
1727 virtual bool allInputDimensionsSpecified()
const noexcept = 0;
1738 virtual bool allInputShapesSpecified()
const noexcept = 0;
1752 virtual void setErrorRecorder(
IErrorRecorder* recorder) noexcept = 0;
1778 virtual bool executeV2(
void** bindings) noexcept = 0;
1802 extern "C" TENSORRTAPI
void* createInferRuntime_INTERNAL(
void* logger,
int version);
1808 extern "C" TENSORRTAPI
void* createInferRefitter_INTERNAL(
void* engine,
void* logger,
int version);
1821 return static_cast<IRuntime*
>(createInferRuntime_INTERNAL(&logger, NV_TENSORRT_VERSION));
1831 return static_cast<IRefitter*
>(createInferRefitter_INTERNAL(&engine, &logger, NV_TENSORRT_VERSION));
1836 #endif // NV_INFER_RUNTIME_H An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:1115
Substract the second element from the first.
DataType
The type of weights and tensors.
Definition: NvInferRuntimeCommon.h:162
IRuntime * createInferRuntime(ILogger &logger)
Create an instance of an IRuntime class.
Definition: NvInferRuntime.h:1819
Check if element in first tensor is less than corresponding element in second tensor.
Check if two elements are equal.
PluginTensorDesc desc
Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of ...
Definition: NvInferRuntime.h:394
Safety restricted capability, TensorRT flow that can only run on GPU devices.
constexpr int EnumMax< WeightsRole >()
Maximum number of elements in WeightsRole enum.
Definition: NvInferRuntime.h:665
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:962
struct CUstream_st * cudaStream_t
Forward declaration of cudaStream_t.
Definition: NvInferRuntimeCommon.h:112
TRT_DEPRECATED bool canBroadcastInputAcrossBatch(int) const _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:570
const void * values
The weight values, in a contiguous array.
Definition: NvInferRuntime.h:102
void configure(const Dims *, int, const Dims *, int, int) _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:287
Definition: NvInferRuntime.h:422
Allows a serialized functionally unsafe engine to be deserialized.
Definition: NvInferRuntime.h:694
Plugin class for user-implemented layers.
Definition: NvInferRuntime.h:134
static const int MAX_DIMS
The maximum number of dimensions supported for a tensor.
Definition: NvInferRuntimeCommon.h:209
TRT_DEPRECATED bool isOutputBroadcastAcrossBatch(int, const bool *, int) const _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:559
bias for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
Fail with error when the coordinates are out of bounds. This is the default.
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntimeCommon.h:926
shift part of IScaleLayer
DataType type
The type of the weights.
Definition: NvInferRuntime.h:101
The TensorRT API version 1 namespace.
Plugin factory for deserialization.
Definition: NvInferRuntime.h:930
nvinfer1::ICudaEngine * deserializeCudaEngine(const void *blob, std::size_t size) noexcept
Deserialize an engine from a stream when plugin factory is not used.
Definition: NvInferRuntime.h:779
This is used to set or get the value that is used in the optimization (kernel selection).
Product of the two elements.
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:997
TRT_DEPRECATED Dims getOutputDimensions(int, const Dims *, int) _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:548
int getTensorRTVersion() const _TENSORRT_OVERRIDE
Return the API version with which this plugin was built.
Definition: NvInferRuntime.h:532
int nbDims
The number of dimensions.
Definition: NvInferRuntime.h:382
constexpr int EnumMax< DimensionOperation >()
Maximum number of elements in DimensionOperation enum.
Definition: NvInferRuntime.h:314
Reference counted application-implemented error reporting interface for TensorRT objects.
Definition: NvInferRuntimeCommon.h:1142
TRT_DEPRECATED int enqueue(int, const void *const *, void **, void *, cudaStream_t) _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:619
Structure to define the dimensions of a tensor.
Definition: NvInferRuntimeCommon.h:206
constexpr int EnumMax< DeviceType >()
Maximum number of elements in DeviceType enum.
Definition: NvInferRuntime.h:681
Minimum of the two elements.
int64_t count
The number of weights in the array.
Definition: NvInferRuntime.h:103
Definition: NvInferRuntime.h:360
kernel for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:98
IRefitter * createInferRefitter(ICudaEngine &engine, ILogger &logger)
Create an instance of an IRefitter class.
Definition: NvInferRuntime.h:1829
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntimeCommon.h:236
Application-implemented logging interface for the builder, engine and runtime.
Definition: NvInferRuntimeCommon.h:986
constexpr int EnumMax< EngineCapability >()
Maximum number of elements in EngineCapability enum.
Definition: NvInferRuntime.h:84
Plugin class for user-implemented layers.
Definition: NvInferRuntime.h:238
Floor division of the first element by the second.
Fields that a plugin might see for an input or output.
Definition: NvInferRuntimeCommon.h:311
EngineCapability
Forward declaration of IPluginFactory for use by other interfaces.
Definition: NvInferRuntime.h:76
Safety restricted capability, TensorRT flow that can only run on DLA devices.
Definition: NvInferRuntime.h:329
Definition: NvInferRuntime.h:391
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:116
constexpr int EnumMax< OptProfileSelector >()
Number of different values of OptProfileSelector enum.
Definition: NvInferRuntime.h:970
Dims max
Upper bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:400
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:300
virtual int getTensorRTVersion() const
Return the API version with which this plugin was built.
Definition: NvInferRuntime.h:246
#define _TENSORRT_OVERRIDE
Items that are marked as deprecated will be removed in a future release.
Definition: NvInferRuntimeCommon.h:62
Application-implemented class for controlling allocation on the GPU.
Definition: NvInferRuntimeCommon.h:943
Context for executing inference using an engine, with functionally unsafe features.
Definition: NvInferRuntime.h:1487
Updates weights in an engine.
Definition: NvInferRuntime.h:792
Dims min
Lower bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:397
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:675
TRT_DEPRECATED size_t getWorkspaceSize(int) const _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:609
Definition: NvInferRuntime.h:379
Application-implemented interface for profiling.
Definition: NvInferRuntime.h:635
Plugin class for user-implemented layers.
Definition: NvInferRuntimeCommon.h:514
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:655
TRT_DEPRECATED bool supportsFormat(DataType, PluginFormat) const _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:584
struct CUevent_st * cudaEvent_t
Forward declaration of cudaEvent_t.
Definition: NvInferRuntimeCommon.h:113
TRT_DEPRECATED void configurePlugin(const Dims *, int, const Dims *, int, const DataType *, const DataType *, const bool *, const bool *, PluginFormat, int) _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInferRuntime.h:595