13#ifndef NV_INFER_RUNTIME_H
14#define NV_INFER_RUNTIME_H
22#include "NvInferImpl.h"
28class IExecutionContext;
31class IEngineInspector;
107 static constexpr int32_t kVALUE = 3;
151 return mImpl->data();
155 std::size_t
size() const noexcept
157 return mImpl->size();
163 return mImpl->type();
227 static constexpr int32_t kVALUE = 2;
249 return mImpl->isConstant();
256 return mImpl->getConstantValue();
287 return mImpl->constant(value);
295 return mImpl->operation(op, first, second);
380 int32_t outputIndex,
DimsExprs const* inputs, int32_t nbInputs,
IExprBuilder& exprBuilder) noexcept = 0;
385 static constexpr int32_t kFORMAT_COMBINATION_LIMIT = 100;
419 virtual
bool supportsFormatCombination(
420 int32_t pos,
PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
473 int32_t nbOutputs) const noexcept = 0;
488 void const* const* inputs,
void* const* outputs,
void* workspace, cudaStream_t stream) noexcept = 0;
498 int32_t getTensorRTVersion() const noexcept
override
509 bool const*,
PluginFormat, int32_t)
noexcept override final
518 Dims getOutputDimensions(int32_t,
Dims const*, int32_t)
noexcept override final
523 bool isOutputBroadcastAcrossBatch(int32_t,
bool const*, int32_t)
const noexcept override final
528 bool canBroadcastInputAcrossBatch(int32_t)
const noexcept override final
533 size_t getWorkspaceSize(int32_t)
const noexcept override final
538 int32_t enqueue(int32_t,
void const*
const*,
void*
const*,
void*, cudaStream_t)
noexcept override final
638 void const* blob, std::
size_t size, IPluginFactory* pluginFactory) noexcept
640 return mImpl->deserializeCudaEngine(blob, size,
nullptr);
655 mImpl->setDLACore(dlaCore);
664 return mImpl->getDLACore();
672 return mImpl->getNbDLACores();
698 mImpl->setGpuAllocator(allocator);
717 mImpl->setErrorRecorder(recorder);
732 return mImpl->getErrorRecorder();
747 return mImpl->deserializeCudaEngine(blob, size,
nullptr);
757 return mImpl->getLogger();
771 return mImpl->setMaxThreads(maxThreads);
785 return mImpl->getMaxThreads();
816 return mImpl->setWeights(layerName, role, weights);
831 return mImpl->refitCudaEngine();
852 return mImpl->getMissing(size, layerNames, roles);
869 return mImpl->getAll(size, layerNames, roles);
896 return mImpl->setDynamicRange(tensorName, min, max);
908 return mImpl->getDynamicRangeMin(tensorName);
920 return mImpl->getDynamicRangeMax(tensorName);
936 return mImpl->getTensorsWithDynamicRange(size, tensorNames);
955 mImpl->setErrorRecorder(recorder);
970 return mImpl->getErrorRecorder();
988 return mImpl->setNamedWeights(name, weights);
1008 return mImpl->getMissingWeights(size, weightsNames);
1024 return mImpl->getAllWeights(size, weightsNames);
1034 return mImpl->getLogger();
1048 return mImpl->setMaxThreads(maxThreads);
1062 return mImpl->getMaxThreads();
1149 return mImpl->setDimensions(inputName, select, dims);
1159 return mImpl->getDimensions(inputName, select);
1201 char const* inputName,
OptProfileSelector select, int32_t
const* values, int32_t nbValues)
noexcept
1203 return mImpl->setShapeValues(inputName, select, values, nbValues);
1214 return mImpl->getNbShapeValues(inputName);
1224 return mImpl->getShapeValues(inputName, select);
1242 return mImpl->setExtraMemoryTarget(target);
1250 return mImpl->getExtraMemoryTarget();
1266 return mImpl->isValid();
1357 int32_t getNbBindings() const noexcept
1359 return mImpl->getNbBindings();
1381 return mImpl->getBindingIndex(name);
1401 return mImpl->getBindingName(bindingIndex);
1414 return mImpl->bindingIsInput(bindingIndex);
1439 return mImpl->getBindingDimensions(bindingIndex);
1452 return mImpl->getBindingDataType(bindingIndex);
1468 return mImpl->getMaxBatchSize();
1482 return mImpl->getNbLayers();
1496 return mImpl->serialize();
1512 return mImpl->createExecutionContext();
1539 return mImpl->getLocation(bindingIndex);
1548 return mImpl->createExecutionContextWithoutDeviceMemory();
1558 return mImpl->getDeviceMemorySize();
1568 return mImpl->isRefittable();
1582 return mImpl->getBindingBytesPerComponent(bindingIndex);
1596 return mImpl->getBindingComponentsPerElement(bindingIndex);
1606 return mImpl->getBindingFormat(bindingIndex);
1625 return mImpl->getBindingFormatDesc(bindingIndex);
1637 return mImpl->getBindingVectorizedDim(bindingIndex);
1652 return mImpl->getName();
1663 return mImpl->getNbOptimizationProfiles();
1690 return mImpl->getProfileDimensions(bindingIndex, profileIndex, select);
1717 return mImpl->getProfileShapeValues(profileIndex, inputIndex, select);
1753 return mImpl->isShapeBinding(bindingIndex);
1767 return mImpl->isExecutionBinding(bindingIndex);
1782 return mImpl->getEngineCapability();
1800 return mImpl->setErrorRecorder(recorder);
1815 return mImpl->getErrorRecorder();
1834 return mImpl->hasImplicitBatchDimension();
1849 return mImpl->getTacticSources();
1860 return mImpl->getProfilingVerbosity();
1870 return mImpl->createEngineInspector();
1916 return mImpl->execute(batchSize, bindings);
1949 int32_t batchSize,
void*
const* bindings, cudaStream_t stream, cudaEvent_t* inputConsumed)
noexcept
1951 return mImpl->enqueue(batchSize, bindings, stream, inputConsumed);
1964 mImpl->setDebugSync(sync);
1974 return mImpl->getDebugSync();
1984 mImpl->setProfiler(profiler);
1994 return mImpl->getProfiler();
2004 return mImpl->getEngine();
2028 mImpl->setName(name);
2038 return mImpl->getName();
2054 mImpl->setDeviceMemory(memory);
2075 return mImpl->getStrides(bindingIndex);
2118 return mImpl->setOptimizationProfile(profileIndex);
2130 return mImpl->getOptimizationProfile();
2167 return mImpl->setBindingDimensions(bindingIndex, dimensions);
2197 return mImpl->getBindingDimensions(bindingIndex);
2227 return mImpl->setInputShapeBinding(bindingIndex, data);
2249 return mImpl->getShapeBinding(bindingIndex, data);
2264 return mImpl->allInputDimensionsSpecified();
2279 return mImpl->allInputShapesSpecified();
2298 mImpl->setErrorRecorder(recorder);
2313 return mImpl->getErrorRecorder();
2330 return mImpl->executeV2(bindings);
2356 bool enqueueV2(
void*
const* bindings, cudaStream_t stream, cudaEvent_t* inputConsumed)
noexcept
2358 return mImpl->enqueueV2(bindings, stream, inputConsumed);
2406 return mImpl->setOptimizationProfileAsync(profileIndex, stream);
2421 mImpl->setEnqueueEmitsProfile(enqueueEmitsProfile);
2432 return mImpl->getEnqueueEmitsProfile();
2461 return mImpl->reportToProfiler();
2523 return mImpl->setExecutionContext(context);
2535 return mImpl->getExecutionContext();
2560 return mImpl->getLayerInformation(layerIndex, format);
2585 return mImpl->getEngineInformation(format);
2604 mImpl->setErrorRecorder(recorder);
2619 return mImpl->getErrorRecorder();
2632extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(
void* logger, int32_t version)
noexcept;
2638extern "C" TENSORRTAPI void* createInferRefitter_INTERNAL(
void* engine,
void* logger, int32_t version)
noexcept;
2672inline IRefitter* createInferRefitter(ICudaEngine& engine, ILogger& logger)
noexcept
2674 return static_cast<IRefitter*
>(createInferRefitter_INTERNAL(&engine, &logger,
NV_TENSORRT_VERSION));
2690template <
typename T>
2706#define REGISTER_TENSORRT_PLUGIN(name) \
2707 static nvinfer1::PluginRegistrar<name> pluginRegistrar##name {}
nvinfer1::ILogger * getLogger() noexcept
Return the logger object.
nvinfer1::IPluginRegistry * getPluginRegistry() noexcept
Return the plugin registry.
#define TENSORRTAPI
Definition: NvInferRuntimeCommon.h:54
#define NV_TENSORRT_VERSION
Definition: NvInferRuntimeCommon.h:73
#define TRT_DEPRECATED
Definition: NvInferRuntimeCommon.h:40
#define TRT_DEPRECATED_ENUM
Definition: NvInferRuntimeCommon.h:41
Structure to define the dimensions of a tensor.
Definition: NvInferRuntimeCommon.h:153
static constexpr int32_t MAX_DIMS
The maximum rank (number of dimensions) supported for a tensor.
Definition: NvInferRuntimeCommon.h:156
Definition: NvInferRuntime.h:309
int32_t nbDims
The number of dimensions.
Definition: NvInferRuntime.h:311
An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:1343
int32_t getBindingBytesPerComponent(int32_t bindingIndex) const noexcept
Return the number of bytes per component of an element.
Definition: NvInferRuntime.h:1580
int32_t getBindingComponentsPerElement(int32_t bindingIndex) const noexcept
Return the number of components included in one element.
Definition: NvInferRuntime.h:1594
bool isShapeBinding(int32_t bindingIndex) const noexcept
True if tensor is required as input for shape calculations or output from them.
Definition: NvInferRuntime.h:1751
bool hasImplicitBatchDimension() const noexcept
Query whether the engine was built with an implicit batch dimension.
Definition: NvInferRuntime.h:1832
int32_t const * getProfileShapeValues(int32_t profileIndex, int32_t inputIndex, OptProfileSelector select) const noexcept
Get minimum / optimum / maximum values for an input shape binding under an optimization profile.
Definition: NvInferRuntime.h:1714
EngineCapability getEngineCapability() const noexcept
Determine what execution capability this engine has.
Definition: NvInferRuntime.h:1780
TRT_DEPRECATED void destroy() noexcept
Destroy this object;.
Definition: NvInferRuntime.h:1522
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:1813
TensorFormat getBindingFormat(int32_t bindingIndex) const noexcept
Return the binding format.
Definition: NvInferRuntime.h:1604
apiv::VCudaEngine * mImpl
Definition: NvInferRuntime.h:1874
TensorLocation getLocation(int32_t bindingIndex) const noexcept
Get location of binding.
Definition: NvInferRuntime.h:1537
Dims getProfileDimensions(int32_t bindingIndex, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a particular binding under an optimization profile...
Definition: NvInferRuntime.h:1688
Dims getBindingDimensions(int32_t bindingIndex) const noexcept
Get the dimensions of a binding.
Definition: NvInferRuntime.h:1437
int32_t getBindingVectorizedDim(int32_t bindingIndex) const noexcept
Return the dimension index that the buffer is vectorized.
Definition: NvInferRuntime.h:1635
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:1798
char const * getBindingName(int32_t bindingIndex) const noexcept
Retrieve the name corresponding to a binding index.
Definition: NvInferRuntime.h:1399
size_t getDeviceMemorySize() const noexcept
Return the amount of device memory required by an execution context.
Definition: NvInferRuntime.h:1556
TacticSources getTacticSources() const noexcept
return the tactic sources required by this engine.
Definition: NvInferRuntime.h:1847
virtual ~ICudaEngine() noexcept=default
char const * getName() const noexcept
Returns the name of the network associated with the engine.
Definition: NvInferRuntime.h:1650
ProfilingVerbosity getProfilingVerbosity() const noexcept
Return the ProfilingVerbosity the builder config was set to when the engine was built.
Definition: NvInferRuntime.h:1858
int32_t getBindingIndex(char const *name) const noexcept
Retrieve the binding index for a named tensor.
Definition: NvInferRuntime.h:1379
char const * getBindingFormatDesc(int32_t bindingIndex) const noexcept
Return the human readable description of the tensor format.
Definition: NvInferRuntime.h:1623
IHostMemory * serialize() const noexcept
Serialize the network to a stream.
Definition: NvInferRuntime.h:1494
IExecutionContext * createExecutionContextWithoutDeviceMemory() noexcept
create an execution context without any device memory allocated
Definition: NvInferRuntime.h:1546
DataType getBindingDataType(int32_t bindingIndex) const noexcept
Determine the required data type for a buffer from its binding index.
Definition: NvInferRuntime.h:1450
IExecutionContext * createExecutionContext() noexcept
Create an execution context.
Definition: NvInferRuntime.h:1510
IEngineInspector * createEngineInspector() const noexcept
Create a new engine inspector which prints the layer information in an engine or an execution context...
Definition: NvInferRuntime.h:1868
TRT_DEPRECATED int32_t getMaxBatchSize() const noexcept
Get the maximum batch size which can be used for inference. Should only be called if the engine is bu...
Definition: NvInferRuntime.h:1466
int32_t getNbOptimizationProfiles() const noexcept
Get the number of optimization profiles defined for this engine.
Definition: NvInferRuntime.h:1661
int32_t getNbLayers() const noexcept
Get the number of layers in the network.
Definition: NvInferRuntime.h:1480
bool bindingIsInput(int32_t bindingIndex) const noexcept
Determine whether a binding is an input binding.
Definition: NvInferRuntime.h:1412
bool isExecutionBinding(int32_t bindingIndex) const noexcept
True if pointer to tensor data is required for execution phase, false if nullptr can be supplied.
Definition: NvInferRuntime.h:1765
bool isRefittable() const noexcept
Return true if an engine can be refit.
Definition: NvInferRuntime.h:1566
Definition: NvInferRuntime.h:244
bool isConstant() const noexcept
Return true if expression is a build-time constant.
Definition: NvInferRuntime.h:247
virtual ~IDimensionExpr() noexcept=default
apiv::VDimensionExpr * mImpl
Definition: NvInferRuntime.h:260
int32_t getConstantValue() const noexcept
Definition: NvInferRuntime.h:254
An engine inspector which prints out the layer information of an engine or an execution context.
Definition: NvInferRuntime.h:2505
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:2617
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:2602
AsciiChar const * getEngineInformation(LayerInformationFormat format) const noexcept
Get a string describing the information about all the layers in the current engine or the execution c...
Definition: NvInferRuntime.h:2583
IExecutionContext const * getExecutionContext() const noexcept
Get the context currently being inspected.
Definition: NvInferRuntime.h:2533
apiv::VEngineInspector * mImpl
Definition: NvInferRuntime.h:2623
virtual ~IEngineInspector() noexcept=default
AsciiChar const * getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
Get a string describing the information about a specific layer in the current engine or the execution...
Definition: NvInferRuntime.h:2558
Reference counted application-implemented error reporting interface for TensorRT objects.
Definition: NvInferRuntimeCommon.h:1665
Context for executing inference using an engine, with functionally unsafe features.
Definition: NvInferRuntime.h:1888
TRT_DEPRECATED bool enqueue(int32_t batchSize, void *const *bindings, cudaStream_t stream, cudaEvent_t *inputConsumed) noexcept
Asynchronously execute inference on a batch.
Definition: NvInferRuntime.h:1948
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:2311
bool reportToProfiler() const noexcept
Calculate layer timing info for the current optimization profile in IExecutionContext and update the ...
Definition: NvInferRuntime.h:2459
virtual ~IExecutionContext() noexcept=default
void setDeviceMemory(void *memory) noexcept
Set the device memory for use by this execution context.
Definition: NvInferRuntime.h:2052
Dims getBindingDimensions(int32_t bindingIndex) const noexcept
Get the dynamic dimensions of a binding.
Definition: NvInferRuntime.h:2195
bool getShapeBinding(int32_t bindingIndex, int32_t *data) const noexcept
Get values of an input tensor required for shape calculations or an output tensor produced by shape c...
Definition: NvInferRuntime.h:2247
char const * getName() const noexcept
Return the name of the execution context.
Definition: NvInferRuntime.h:2036
void setDebugSync(bool sync) noexcept
Set the debug sync flag.
Definition: NvInferRuntime.h:1962
bool enqueueV2(void *const *bindings, cudaStream_t stream, cudaEvent_t *inputConsumed) noexcept
Asynchronously execute inference.
Definition: NvInferRuntime.h:2356
void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
Set whether enqueue emits layer timing to the profiler.
Definition: NvInferRuntime.h:2419
bool setBindingDimensions(int32_t bindingIndex, Dims dimensions) noexcept
Set the dynamic dimensions of a binding.
Definition: NvInferRuntime.h:2165
bool setInputShapeBinding(int32_t bindingIndex, int32_t const *data) noexcept
Set values of input tensor required by shape calculations.
Definition: NvInferRuntime.h:2225
bool executeV2(void *const *bindings) noexcept
Synchronously execute inference a network.
Definition: NvInferRuntime.h:2328
bool getEnqueueEmitsProfile() const noexcept
Get the enqueueEmitsProfile state.
Definition: NvInferRuntime.h:2430
bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
Select an optimization profile for the current context with async semantics.
Definition: NvInferRuntime.h:2404
apiv::VExecutionContext * mImpl
Definition: NvInferRuntime.h:2465
ICudaEngine const & getEngine() const noexcept
Get the associated engine.
Definition: NvInferRuntime.h:2002
int32_t getOptimizationProfile() const noexcept
Get the index of the currently selected optimization profile.
Definition: NvInferRuntime.h:2128
TRT_DEPRECATED void destroy() noexcept
Destroy this object.
Definition: NvInferRuntime.h:2014
bool getDebugSync() const noexcept
Get the debug sync flag.
Definition: NvInferRuntime.h:1972
Dims getStrides(int32_t bindingIndex) const noexcept
Return the strides of the buffer for the given binding.
Definition: NvInferRuntime.h:2073
IProfiler * getProfiler() const noexcept
Get the profiler.
Definition: NvInferRuntime.h:1992
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:2296
TRT_DEPRECATED bool setOptimizationProfile(int32_t profileIndex) noexcept
Select an optimization profile for the current context.
Definition: NvInferRuntime.h:2116
bool allInputDimensionsSpecified() const noexcept
Whether all dynamic dimensions of input tensors have been specified.
Definition: NvInferRuntime.h:2262
void setProfiler(IProfiler *profiler) noexcept
Set the profiler.
Definition: NvInferRuntime.h:1982
void setName(char const *name) noexcept
Set the name of the execution context.
Definition: NvInferRuntime.h:2026
bool allInputShapesSpecified() const noexcept
Whether all input shape bindings have been specified.
Definition: NvInferRuntime.h:2276
Definition: NvInferRuntime.h:282
IDimensionExpr const * operation(DimensionOperation op, IDimensionExpr const &first, IDimensionExpr const &second) noexcept
Definition: NvInferRuntime.h:292
virtual ~IExprBuilder() noexcept=default
apiv::VExprBuilder * mImpl
Definition: NvInferRuntime.h:299
IDimensionExpr const * constant(int32_t value) noexcept
Return pointer to IDimensionExp for given value.
Definition: NvInferRuntime.h:285
Application-implemented class for controlling allocation on the GPU.
Definition: NvInferRuntimeCommon.h:1338
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:144
void * data() const noexcept
A pointer to the raw data that is owned by the library.
Definition: NvInferRuntime.h:149
DataType type() const noexcept
The type of the memory that was allocated.
Definition: NvInferRuntime.h:161
TRT_DEPRECATED void destroy() noexcept
Definition: NvInferRuntime.h:172
std::size_t size() const noexcept
The size in bytes of the data that was allocated.
Definition: NvInferRuntime.h:155
apiv::VHostMemory * mImpl
Definition: NvInferRuntime.h:178
virtual ~IHostMemory() noexcept=default
Application-implemented logging interface for the builder, refitter and runtime.
Definition: NvInferRuntimeCommon.h:1476
Forward declaration of IEngineInspector for use by other interfaces.
Definition: NvInferRuntime.h:43
INoCopy & operator=(INoCopy &&other)=delete
INoCopy(INoCopy const &other)=delete
INoCopy(INoCopy &&other)=delete
virtual ~INoCopy()=default
INoCopy & operator=(INoCopy const &other)=delete
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:1120
int32_t const * getShapeValues(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:1222
apiv::VOptimizationProfile * mImpl
Definition: NvInferRuntime.h:1270
Dims getDimensions(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:1157
virtual ~IOptimizationProfile() noexcept=default
float getExtraMemoryTarget() const noexcept
Get the extra memory target that has been defined for this profile.
Definition: NvInferRuntime.h:1248
bool setDimensions(char const *inputName, OptProfileSelector select, Dims dims) noexcept
Set the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:1147
bool setExtraMemoryTarget(float target) noexcept
Set a target for extra GPU memory that may be used by this profile.
Definition: NvInferRuntime.h:1240
bool setShapeValues(char const *inputName, OptProfileSelector select, int32_t const *values, int32_t nbValues) noexcept
Set the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:1200
bool isValid() const noexcept
Check whether the optimization profile can be passed to an IBuilderConfig object.
Definition: NvInferRuntime.h:1264
int32_t getNbShapeValues(char const *inputName) const noexcept
Get the number of values for an input shape tensor.
Definition: NvInferRuntime.h:1212
Single registration point for all plugins in an application. It is used to find plugin implementation...
Definition: NvInferRuntimeCommon.h:1210
virtual bool registerCreator(IPluginCreator &creator, AsciiChar const *const pluginNamespace) noexcept=0
Register a plugin creator. Returns false if one with same type is already registered.
Definition: NvInferRuntime.h:351
IPluginV2DynamicExt * clone() const noexcept override=0
Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin...
virtual ~IPluginV2DynamicExt() noexcept
Definition: NvInferRuntime.h:503
Plugin class for user-implemented layers.
Definition: NvInferRuntimeCommon.h:680
Application-implemented interface for profiling.
Definition: NvInferRuntime.h:556
virtual void reportLayerTime(char const *layerName, float ms) noexcept=0
Layer time reporting callback.
virtual ~IProfiler() noexcept
Definition: NvInferRuntime.h:566
Updates weights in an engine.
Definition: NvInferRuntime.h:800
int32_t getMaxThreads() const noexcept
get the maximum number of threads that can be used by the refitter.
Definition: NvInferRuntime.h:1060
bool setNamedWeights(char const *name, Weights weights) noexcept
Specify new weights of given name.
Definition: NvInferRuntime.h:986
int32_t getAllWeights(int32_t size, char const **weightsNames) noexcept
Get names of all weights that could be refit.
Definition: NvInferRuntime.h:1022
bool setDynamicRange(char const *tensorName, float min, float max) noexcept
Definition: NvInferRuntime.h:894
ILogger * getLogger() const noexcept
get the logger with which the refitter was created
Definition: NvInferRuntime.h:1032
int32_t getTensorsWithDynamicRange(int32_t size, char const **tensorNames) const noexcept
Get names of all tensors that have refittable dynamic ranges.
Definition: NvInferRuntime.h:934
float getDynamicRangeMin(char const *tensorName) const noexcept
Get minimum of dynamic range.
Definition: NvInferRuntime.h:906
bool refitCudaEngine() noexcept
Updates associated engine. Return true if successful.
Definition: NvInferRuntime.h:829
int32_t getMissingWeights(int32_t size, char const **weightsNames) noexcept
Get names of missing weights.
Definition: NvInferRuntime.h:1006
int32_t getMissing(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of missing weights.
Definition: NvInferRuntime.h:850
TRT_DEPRECATED void destroy() noexcept
Definition: NvInferRuntime.h:877
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:1046
float getDynamicRangeMax(char const *tensorName) const noexcept
Get maximum of dynamic range.
Definition: NvInferRuntime.h:918
apiv::VRefitter * mImpl
Definition: NvInferRuntime.h:1066
int32_t getAll(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of all weights that could be refit.
Definition: NvInferRuntime.h:867
virtual ~IRefitter() noexcept=default
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:953
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:968
Allows a serialized functionally unsafe engine to be deserialized.
Definition: NvInferRuntime.h:618
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:769
virtual ~IRuntime() noexcept=default
TRT_DEPRECATED void destroy() noexcept
Destroy this object.
Definition: NvInferRuntime.h:682
apiv::VRuntime * mImpl
Definition: NvInferRuntime.h:789
void setDLACore(int32_t dlaCore) noexcept
Sets the DLA core used by the network. Defaults to -1.
Definition: NvInferRuntime.h:653
int32_t getNbDLACores() const noexcept
Returns number of DLA hardware cores accessible or 0 if DLA is unavailable.
Definition: NvInferRuntime.h:670
ICudaEngine * deserializeCudaEngine(void const *blob, std::size_t size) noexcept
Deserialize an engine from a stream.
Definition: NvInferRuntime.h:745
int32_t getDLACore() const noexcept
Get the DLA core that the engine executes on.
Definition: NvInferRuntime.h:662
void setGpuAllocator(IGpuAllocator *allocator) noexcept
Set the GPU allocator.
Definition: NvInferRuntime.h:696
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:730
ILogger * getLogger() const noexcept
get the logger with which the runtime was created
Definition: NvInferRuntime.h:755
int32_t getMaxThreads() const noexcept
Get the maximum number of threads that can be used by the runtime.
Definition: NvInferRuntime.h:783
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:715
Register the plugin creator to the registry The static registry object will be instantiated when the ...
Definition: NvInferRuntime.h:2692
PluginRegistrar()
Definition: NvInferRuntime.h:2694
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:126
DataType type
The type of the weights.
Definition: NvInferRuntime.h:128
int64_t count
The number of weights in the array.
Definition: NvInferRuntime.h:130
void const * values
The weight values, in a contiguous array.
Definition: NvInferRuntime.h:129
IRuntime * createInferRuntime(ILogger &logger) noexcept
Create an instance of an safe::IRuntime class.
The TensorRT API version 1 namespace.
uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferRuntime.h:1305
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:69
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:192
@ kSUB
Substract the second element from the first.
@ kSUM
Sum of the two operands.
@ kPROD
Product of the two operands.
@ kFLOOR_DIV
Floor division of the first element by the second.
@ kEQUAL
1 if operands are equal, 0 otherwise.
@ kMIN
Minimum of the two operands.
@ kLESS
1 if first operand is less than second operand, 0 otherwise.
@ kMAX
Maximum of the two operands.
@ kCEIL_DIV
Division rounding up.
constexpr int32_t EnumMax< WeightsRole >() noexcept
Maximum number of elements in WeightsRole enum.
Definition: NvInferRuntime.h:587
char_t AsciiChar
AsciiChar is the type used by TensorRT to represent valid ASCII characters.
Definition: NvInferRuntimeCommon.h:88
@ kV2_DYNAMICEXT
IPluginV2DynamicExt.
constexpr int32_t EnumMax< LayerInformationFormat >() noexcept
Definition: NvInferRuntime.h:2484
DataType
The type of weights and tensors.
Definition: NvInferRuntimeCommon.h:114
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:598
@ kCONSTANT
Constant layer.
@ kDEFAULT
Similar to ONNX Gather.
constexpr int32_t EnumMax< OptProfileSelector >() noexcept
Number of different values of OptProfileSelector enum.
Definition: NvInferRuntime.h:1092
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:576
@ kSHIFT
shift part of IScaleLayer
@ kANY
Any other weights role.
@ kBIAS
bias for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
@ kKERNEL
kernel for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
constexpr int32_t EnumMax< ProfilingVerbosity >() noexcept
Maximum number of profile verbosity levels in ProfilingVerbosity enum.
Definition: NvInferRuntime.h:1330
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
Definition: NvInferRuntime.h:1317
@ kLAYER_NAMES_ONLY
Print only the layer names. This is the default setting.
@ kDETAILED
Print detailed layer information including layer names and layer parameters.
TacticSource
List of tactic sources for TensorRT.
Definition: NvInferRuntime.h:1282
@ kCUBLAS_LT
cuBLAS LT tactics
@ kEDGE_MASK_CONVOLUTIONS
TensorFormat PluginFormat
PluginFormat is reserved for backward compatibility.
Definition: NvInferRuntimeCommon.h:308
@ kMIN
Minimum of the two elements.
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntimeCommon.h:183
constexpr int32_t EnumMax< TacticSource >() noexcept
Maximum number of tactic sources in TacticSource enum.
Definition: NvInferRuntime.h:1294
LayerInformationFormat
The format in which the IEngineInspector prints the layer information.
Definition: NvInferRuntime.h:2476
@ kJSON
Print layer information in JSON format.
@ kONELINE
Print layer information in one line per layer.
constexpr int32_t EnumMax< DeviceType >() noexcept
Maximum number of elements in DeviceType enum.
Definition: NvInferRuntime.h:605
constexpr int32_t EnumMax< DimensionOperation >() noexcept
Maximum number of elements in DimensionOperation enum.
Definition: NvInferRuntime.h:206
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntime.h:216
@ kHOST
Data stored on host.
@ kDEVICE
Data stored on device.
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:1080
@ kOPT
This is used to set or get the value that is used in the optimization (kernel selection).
Definition: NvInferRuntime.h:321
Dims min
Lower bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:326
Dims max
Upper bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:329
PluginTensorDesc desc
Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of ...
Definition: NvInferRuntime.h:323
Fields that a plugin might see for an input or output.
Definition: NvInferRuntimeCommon.h:332
Declaration of EnumMaxImpl struct to store maximum number of elements in an enumeration type.
Definition: NvInferRuntimeCommon.h:99