13#ifndef NV_INFER_RUNTIME_H
14#define NV_INFER_RUNTIME_H
22#include "NvInferImpl.h"
28class IExecutionContext;
31class IEngineInspector;
107 static constexpr int32_t kVALUE = 3;
151 return mImpl->data();
155 std::size_t
size() const noexcept
157 return mImpl->size();
163 return mImpl->type();
227 static constexpr int32_t kVALUE = 2;
249 return mImpl->isConstant();
256 return mImpl->getConstantValue();
287 return mImpl->constant(value);
295 return mImpl->operation(op, first, second);
380 int32_t outputIndex,
DimsExprs const* inputs, int32_t nbInputs,
IExprBuilder& exprBuilder) noexcept = 0;
385 static constexpr int32_t kFORMAT_COMBINATION_LIMIT = 100;
419 virtual
bool supportsFormatCombination(
420 int32_t pos,
PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
473 int32_t nbOutputs) const noexcept = 0;
488 void const* const* inputs,
void* const* outputs,
void* workspace, cudaStream_t stream) noexcept = 0;
498 int32_t getTensorRTVersion() const noexcept
override
509 bool const*,
PluginFormat, int32_t)
noexcept override final
518 Dims getOutputDimensions(int32_t,
Dims const*, int32_t)
noexcept override final
523 bool isOutputBroadcastAcrossBatch(int32_t,
bool const*, int32_t)
const noexcept override final
528 bool canBroadcastInputAcrossBatch(int32_t)
const noexcept override final
533 size_t getWorkspaceSize(int32_t)
const noexcept override final
538 int32_t enqueue(int32_t,
void const*
const*,
void*
const*,
void*, cudaStream_t)
noexcept override final
674 void const* blob, std::
size_t size, IPluginFactory* pluginFactory) noexcept
676 return mImpl->deserializeCudaEngine(blob, size,
nullptr);
691 mImpl->setDLACore(dlaCore);
700 return mImpl->getDLACore();
708 return mImpl->getNbDLACores();
734 mImpl->setGpuAllocator(allocator);
753 mImpl->setErrorRecorder(recorder);
768 return mImpl->getErrorRecorder();
783 return mImpl->deserializeCudaEngine(blob, size,
nullptr);
793 return mImpl->getLogger();
807 return mImpl->setMaxThreads(maxThreads);
821 return mImpl->getMaxThreads();
856 return mImpl->setTemporaryDirectory(path);
867 return mImpl->getTemporaryDirectory();
883 return mImpl->setTempfileControlFlags(flags);
895 return mImpl->getTempfileControlFlags();
905 return mImpl->getPluginRegistry();
923 return mImpl->loadRuntime(path);
935 return mImpl->setEngineHostCodeAllowed(allowed);
945 return mImpl->getEngineHostCodeAllowed();
979 return mImpl->setWeights(layerName, role, weights);
994 return mImpl->refitCudaEngine();
1015 return mImpl->getMissing(size, layerNames, roles);
1032 return mImpl->getAll(size, layerNames, roles);
1062 return mImpl->setDynamicRange(tensorName, min, max);
1076 return mImpl->getDynamicRangeMin(tensorName);
1090 return mImpl->getDynamicRangeMax(tensorName);
1106 return mImpl->getTensorsWithDynamicRange(size, tensorNames);
1125 mImpl->setErrorRecorder(recorder);
1140 return mImpl->getErrorRecorder();
1161 return mImpl->setNamedWeights(name, weights);
1181 return mImpl->getMissingWeights(size, weightsNames);
1197 return mImpl->getAllWeights(size, weightsNames);
1207 return mImpl->getLogger();
1221 return mImpl->setMaxThreads(maxThreads);
1235 return mImpl->getMaxThreads();
1324 return mImpl->setDimensions(inputName, select, dims);
1336 return mImpl->getDimensions(inputName, select);
1381 char const* inputName,
OptProfileSelector select, int32_t
const* values, int32_t nbValues)
noexcept
1383 return mImpl->setShapeValues(inputName, select, values, nbValues);
1396 return mImpl->getNbShapeValues(inputName);
1408 return mImpl->getShapeValues(inputName, select);
1426 return mImpl->setExtraMemoryTarget(target);
1438 return mImpl->getExtraMemoryTarget();
1455 return mImpl->isValid();
1561 return mImpl->getNbBindings();
1589 return mImpl->getBindingIndex(name);
1612 return mImpl->getBindingName(bindingIndex);
1627 return mImpl->bindingIsInput(bindingIndex);
1654 return mImpl->getBindingDimensions(bindingIndex);
1669 return mImpl->getTensorShape(tensorName);
1684 return mImpl->getBindingDataType(bindingIndex);
1699 return mImpl->getTensorDataType(tensorName);
1715 return mImpl->getMaxBatchSize();
1729 return mImpl->getNbLayers();
1743 return mImpl->serialize();
1760 return mImpl->createExecutionContext();
1790 return mImpl->getLocation(bindingIndex);
1807 return mImpl->getTensorLocation(tensorName);
1827 return mImpl->isShapeInferenceIO(tensorName);
1841 return mImpl->getTensorIOMode(tensorName);
1850 return mImpl->createExecutionContextWithoutDeviceMemory();
1860 return mImpl->getDeviceMemorySize();
1870 return mImpl->isRefittable();
1887 return mImpl->getBindingBytesPerComponent(bindingIndex);
1904 return mImpl->getTensorBytesPerComponent(tensorName);
1920 return mImpl->getBindingComponentsPerElement(bindingIndex);
1937 return mImpl->getTensorComponentsPerElement(tensorName);
1951 return mImpl->getBindingFormat(bindingIndex);
1964 return mImpl->getTensorFormat(tensorName);
1988 return mImpl->getBindingFormatDesc(bindingIndex);
2010 return mImpl->getTensorFormatDesc(tensorName);
2026 return mImpl->getBindingVectorizedDim(bindingIndex);
2041 return mImpl->getTensorVectorizedDim(tensorName);
2056 return mImpl->getName();
2067 return mImpl->getNbOptimizationProfiles();
2098 int32_t bindingIndex, int32_t profileIndex,
OptProfileSelector select)
const noexcept
2100 return mImpl->getProfileDimensions(bindingIndex, profileIndex, select);
2120 return mImpl->getProfileShape(tensorName, profileIndex, select);
2150 return mImpl->getProfileShapeValues(profileIndex, inputIndex, select);
2188 return mImpl->isShapeBinding(bindingIndex);
2205 return mImpl->isExecutionBinding(bindingIndex);
2220 return mImpl->getEngineCapability();
2238 return mImpl->setErrorRecorder(recorder);
2253 return mImpl->getErrorRecorder();
2272 return mImpl->hasImplicitBatchDimension();
2287 return mImpl->getTacticSources();
2298 return mImpl->getProfilingVerbosity();
2308 return mImpl->createEngineInspector();
2321 return mImpl->getNbIOTensors();
2333 return mImpl->getIOTensorName(index);
2345 return mImpl->getHardwareCompatibilityLevel();
2360 return mImpl->getNbAuxStreams();
2407 virtual void*
reallocateOutput(
char const* tensorName,
void* currentMemory, uint64_t size, uint64_t alignment)
noexcept = 0;
2461 return mImpl->execute(batchSize, bindings);
2494 int32_t batchSize,
void*
const* bindings, cudaStream_t stream, cudaEvent_t* inputConsumed)
noexcept
2496 return mImpl->enqueue(batchSize, bindings, stream, inputConsumed);
2509 mImpl->setDebugSync(sync);
2519 return mImpl->getDebugSync();
2529 mImpl->setProfiler(profiler);
2539 return mImpl->getProfiler();
2549 return mImpl->getEngine();
2575 mImpl->setName(name);
2585 return mImpl->getName();
2602 mImpl->setDeviceMemory(memory);
2625 return mImpl->getStrides(bindingIndex);
2646 return mImpl->getTensorStrides(tensorName);
2684 return mImpl->setOptimizationProfile(profileIndex);
2698 return mImpl->getOptimizationProfile();
2737 return mImpl->setBindingDimensions(bindingIndex, dimensions);
2755 return mImpl->setInputShape(tensorName, dims);
2788 return mImpl->getBindingDimensions(bindingIndex);
2825 return mImpl->getTensorShape(tensorName);
2860 return mImpl->setInputShapeBinding(bindingIndex, data);
2884 return mImpl->getShapeBinding(bindingIndex, data);
2902 return mImpl->allInputDimensionsSpecified();
2919 return mImpl->allInputShapesSpecified();
2938 mImpl->setErrorRecorder(recorder);
2953 return mImpl->getErrorRecorder();
2970 return mImpl->executeV2(bindings);
3000 return mImpl->enqueueV2(bindings, stream, inputConsumed);
3047 return mImpl->setOptimizationProfileAsync(profileIndex, stream);
3062 mImpl->setEnqueueEmitsProfile(enqueueEmitsProfile);
3073 return mImpl->getEnqueueEmitsProfile();
3102 return mImpl->reportToProfiler();
3144 return mImpl->setTensorAddress(tensorName, data);
3161 return mImpl->getTensorAddress(tensorName);
3183 return mImpl->setInputTensorAddress(tensorName, data);
3202 return mImpl->getOutputTensorAddress(tensorName);
3233 int32_t
inferShapes(int32_t nbMaxNames,
char const** tensorNames)
noexcept
3235 return mImpl->inferShapes(nbMaxNames, tensorNames);
3251 return mImpl->setInputConsumedEvent(event);
3261 return mImpl->getInputConsumedEvent();
3280 return mImpl->setOutputAllocator(tensorName, outputAllocator);
3293 return mImpl->getOutputAllocator(tensorName);
3311 return mImpl->getMaxOutputSize(tensorName);
3331 return mImpl->setTemporaryStorageAllocator(allocator);
3341 return mImpl->getTemporaryStorageAllocator();
3359 return mImpl->enqueueV3(stream);
3374 mImpl->setPersistentCacheLimit(size);
3385 return mImpl->getPersistentCacheLimit();
3409 return mImpl->setNvtxVerbosity(verbosity);
3421 return mImpl->getNvtxVerbosity();
3452 mImpl->setAuxStreams(auxStreams, nbStreams);
3514 return mImpl->setExecutionContext(context);
3526 return mImpl->getExecutionContext();
3551 return mImpl->getLayerInformation(layerIndex, format);
3574 return mImpl->getEngineInformation(format);
3593 mImpl->setErrorRecorder(recorder);
3608 return mImpl->getErrorRecorder();
3621extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(
void* logger, int32_t version)
noexcept;
3627extern "C" TENSORRTAPI void* createInferRefitter_INTERNAL(
void* engine,
void* logger, int32_t version)
noexcept;
3661inline IRefitter* createInferRefitter(ICudaEngine& engine, ILogger& logger)
noexcept
3663 return static_cast<IRefitter*
>(createInferRefitter_INTERNAL(&engine, &logger,
NV_TENSORRT_VERSION));
3679template <
typename T>
3695#define REGISTER_TENSORRT_PLUGIN(name) \
3696 static nvinfer1::PluginRegistrar<name> pluginRegistrar##name {}
nvinfer1::ILogger * getLogger() noexcept
Return the logger object.
nvinfer1::IPluginRegistry * getPluginRegistry() noexcept
Return the plugin registry.
#define TENSORRTAPI
Definition: NvInferRuntimeBase.h:54
#define NV_TENSORRT_VERSION
Definition: NvInferRuntimeBase.h:76
#define TRT_DEPRECATED
Definition: NvInferRuntimeBase.h:40
#define TRT_DEPRECATED_ENUM
Definition: NvInferRuntimeBase.h:41
Structure to define the dimensions of a tensor.
Definition: NvInferRuntimeBase.h:179
static constexpr int32_t MAX_DIMS
The maximum rank (number of dimensions) supported for a tensor.
Definition: NvInferRuntimeBase.h:182
Definition: NvInferRuntime.h:309
int32_t nbDims
The number of dimensions.
Definition: NvInferRuntime.h:311
An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:1543
int32_t getTensorBytesPerComponent(char const *tensorName) const noexcept
Return the number of bytes per component of an element, or -1 if the provided name does not map to an...
Definition: NvInferRuntime.h:1902
bool hasImplicitBatchDimension() const noexcept
Query whether the engine was built with an implicit batch dimension.
Definition: NvInferRuntime.h:2270
char const * getIOTensorName(int32_t index) const noexcept
Return name of an IO tensor.
Definition: NvInferRuntime.h:2331
EngineCapability getEngineCapability() const noexcept
Determine what execution capability this engine has.
Definition: NvInferRuntime.h:2218
TRT_DEPRECATED bool isExecutionBinding(int32_t bindingIndex) const noexcept
True if pointer to tensor data is required for execution phase, false if nullptr can be supplied.
Definition: NvInferRuntime.h:2203
TRT_DEPRECATED int32_t getBindingIndex(char const *name) const noexcept
Retrieve the binding index for a named tensor.
Definition: NvInferRuntime.h:1587
TRT_DEPRECATED void destroy() noexcept
Destroy this object;.
Definition: NvInferRuntime.h:1770
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:2251
TRT_DEPRECATED char const * getBindingName(int32_t bindingIndex) const noexcept
Retrieve the name corresponding to a binding index.
Definition: NvInferRuntime.h:1610
apiv::VCudaEngine * mImpl
Definition: NvInferRuntime.h:2364
char const * getTensorFormatDesc(char const *tensorName) const noexcept
Return the human readable description of the tensor format, or empty string if the provided name does...
Definition: NvInferRuntime.h:2008
Dims getProfileShape(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for an input tensor given its name under an optimizati...
Definition: NvInferRuntime.h:2118
TRT_DEPRECATED int32_t const * getProfileShapeValues(int32_t profileIndex, int32_t inputIndex, OptProfileSelector select) const noexcept
Get minimum / optimum / maximum values for an input shape binding under an optimization profile.
Definition: NvInferRuntime.h:2147
int32_t getNbAuxStreams() const noexcept
Return the number of auxiliary streams used by this engine.
Definition: NvInferRuntime.h:2358
TRT_DEPRECATED bool bindingIsInput(int32_t bindingIndex) const noexcept
Determine whether a binding is an input binding.
Definition: NvInferRuntime.h:1625
TRT_DEPRECATED Dims getBindingDimensions(int32_t bindingIndex) const noexcept
Get the dimensions of a binding.
Definition: NvInferRuntime.h:1652
DataType getTensorDataType(char const *tensorName) const noexcept
Determine the required data type for a buffer from its tensor name.
Definition: NvInferRuntime.h:1697
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:2236
size_t getDeviceMemorySize() const noexcept
Return the amount of device memory required by an execution context.
Definition: NvInferRuntime.h:1858
TacticSources getTacticSources() const noexcept
return the tactic sources required by this engine.
Definition: NvInferRuntime.h:2285
TRT_DEPRECATED TensorFormat getBindingFormat(int32_t bindingIndex) const noexcept
Return the binding format.
Definition: NvInferRuntime.h:1949
virtual ~ICudaEngine() noexcept=default
TRT_DEPRECATED int32_t getBindingComponentsPerElement(int32_t bindingIndex) const noexcept
Return the number of components included in one element.
Definition: NvInferRuntime.h:1918
char const * getName() const noexcept
Returns the name of the network associated with the engine.
Definition: NvInferRuntime.h:2054
ProfilingVerbosity getProfilingVerbosity() const noexcept
Return the ProfilingVerbosity the builder config was set to when the engine was built.
Definition: NvInferRuntime.h:2296
bool isShapeInferenceIO(char const *tensorName) const noexcept
True if tensor is required as input for shape calculations or is output from shape calculations.
Definition: NvInferRuntime.h:1825
TRT_DEPRECATED bool isShapeBinding(int32_t bindingIndex) const noexcept
True if tensor is required as input for shape calculations or output from them.
Definition: NvInferRuntime.h:2186
int32_t getTensorVectorizedDim(char const *tensorName) const noexcept
Return the dimension index that the buffer is vectorized, or -1 if the provided name does not map to ...
Definition: NvInferRuntime.h:2039
TRT_DEPRECATED DataType getBindingDataType(int32_t bindingIndex) const noexcept
Determine the required data type for a buffer from its binding index.
Definition: NvInferRuntime.h:1682
TRT_DEPRECATED int32_t getBindingVectorizedDim(int32_t bindingIndex) const noexcept
Return the dimension index that the buffer is vectorized, or -1 is the name is not found.
Definition: NvInferRuntime.h:2024
TRT_DEPRECATED char const * getBindingFormatDesc(int32_t bindingIndex) const noexcept
Return the human readable description of the tensor format, or nullptr if the provided name does not ...
Definition: NvInferRuntime.h:1986
TensorFormat getTensorFormat(char const *tensorName) const noexcept
Return the binding format, or TensorFormat::kLINEAR if the provided name does not map to an input or ...
Definition: NvInferRuntime.h:1962
IHostMemory * serialize() const noexcept
Serialize the network to a stream.
Definition: NvInferRuntime.h:1741
IExecutionContext * createExecutionContextWithoutDeviceMemory() noexcept
create an execution context without any device memory allocated
Definition: NvInferRuntime.h:1848
TensorLocation getTensorLocation(char const *tensorName) const noexcept
Get whether an input or output tensor must be on GPU or CPU.
Definition: NvInferRuntime.h:1805
IExecutionContext * createExecutionContext() noexcept
Create an execution context.
Definition: NvInferRuntime.h:1758
IEngineInspector * createEngineInspector() const noexcept
Create a new engine inspector which prints the layer information in an engine or an execution context...
Definition: NvInferRuntime.h:2306
HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept
Return the hardware compatibility level of this engine.
Definition: NvInferRuntime.h:2343
TRT_DEPRECATED int32_t getMaxBatchSize() const noexcept
Get the maximum batch size which can be used for inference. Should only be called if the engine is bu...
Definition: NvInferRuntime.h:1713
int32_t getNbOptimizationProfiles() const noexcept
Get the number of optimization profiles defined for this engine.
Definition: NvInferRuntime.h:2065
TensorIOMode getTensorIOMode(char const *tensorName) const noexcept
Determine whether a tensor is an input or output tensor.
Definition: NvInferRuntime.h:1839
int32_t getNbLayers() const noexcept
Get the number of layers in the network.
Definition: NvInferRuntime.h:1727
TRT_DEPRECATED TensorLocation getLocation(int32_t bindingIndex) const noexcept
Get location of binding.
Definition: NvInferRuntime.h:1788
int32_t getNbIOTensors() const noexcept
Return number of IO tensors.
Definition: NvInferRuntime.h:2319
TRT_DEPRECATED Dims getProfileDimensions(int32_t bindingIndex, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a particular input binding under an optimization p...
Definition: NvInferRuntime.h:2097
TRT_DEPRECATED int32_t getBindingBytesPerComponent(int32_t bindingIndex) const noexcept
Return the number of bytes per component of an element.
Definition: NvInferRuntime.h:1885
Dims getTensorShape(char const *tensorName) const noexcept
Get shape of an input or output tensor.
Definition: NvInferRuntime.h:1667
int32_t getTensorComponentsPerElement(char const *tensorName) const noexcept
Return the number of components included in one element, or -1 if the provided name does not map to a...
Definition: NvInferRuntime.h:1935
bool isRefittable() const noexcept
Return true if an engine can be refit.
Definition: NvInferRuntime.h:1868
Definition: NvInferRuntime.h:244
bool isConstant() const noexcept
Return true if expression is a build-time constant.
Definition: NvInferRuntime.h:247
virtual ~IDimensionExpr() noexcept=default
apiv::VDimensionExpr * mImpl
Definition: NvInferRuntime.h:260
int32_t getConstantValue() const noexcept
Definition: NvInferRuntime.h:254
An engine inspector which prints out the layer information of an engine or an execution context.
Definition: NvInferRuntime.h:3496
char const * getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
Get a string describing the information about a specific layer in the current engine or the execution...
Definition: NvInferRuntime.h:3549
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:3606
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:3591
IExecutionContext const * getExecutionContext() const noexcept
Get the context currently being inspected.
Definition: NvInferRuntime.h:3524
apiv::VEngineInspector * mImpl
Definition: NvInferRuntime.h:3612
virtual ~IEngineInspector() noexcept=default
char const * getEngineInformation(LayerInformationFormat format) const noexcept
Get a string describing the information about all the layers in the current engine or the execution c...
Definition: NvInferRuntime.h:3572
Reference counted application-implemented error reporting interface for TensorRT objects.
Definition: NvInferRuntimeBase.h:694
Context for executing inference using an engine, with functionally unsafe features.
Definition: NvInferRuntime.h:2433
TRT_DEPRECATED bool enqueue(int32_t batchSize, void *const *bindings, cudaStream_t stream, cudaEvent_t *inputConsumed) noexcept
Enqueue inference of a batch on a stream.
Definition: NvInferRuntime.h:2493
IOutputAllocator * getOutputAllocator(char const *tensorName) const noexcept
Get output allocator associated with output tensor of given name, or nullptr if the provided name doe...
Definition: NvInferRuntime.h:3291
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:2951
bool reportToProfiler() const noexcept
Calculate layer timing info for the current optimization profile in IExecutionContext and update the ...
Definition: NvInferRuntime.h:3100
virtual ~IExecutionContext() noexcept=default
void setDeviceMemory(void *memory) noexcept
Set the device memory for use by this execution context.
Definition: NvInferRuntime.h:2600
char const * getName() const noexcept
Return the name of the execution context.
Definition: NvInferRuntime.h:2583
void setDebugSync(bool sync) noexcept
Set the debug sync flag.
Definition: NvInferRuntime.h:2507
IGpuAllocator * getTemporaryStorageAllocator() const noexcept
Get allocator set by setTemporaryStorageAllocator.
Definition: NvInferRuntime.h:3339
void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
Set whether enqueue emits layer timing to the profiler.
Definition: NvInferRuntime.h:3060
Dims getTensorShape(char const *tensorName) const noexcept
Return the shape of the given input or output.
Definition: NvInferRuntime.h:2823
bool setInputShape(char const *tensorName, Dims const &dims) noexcept
Set shape of given input.
Definition: NvInferRuntime.h:2753
bool executeV2(void *const *bindings) noexcept
Synchronously execute inference a network.
Definition: NvInferRuntime.h:2968
bool getEnqueueEmitsProfile() const noexcept
Get the enqueueEmitsProfile state.
Definition: NvInferRuntime.h:3071
TRT_DEPRECATED bool setBindingDimensions(int32_t bindingIndex, Dims dimensions) noexcept
Set the dynamic dimensions of an input binding.
Definition: NvInferRuntime.h:2735
void const * getTensorAddress(char const *tensorName) const noexcept
Get memory address bound to given input or output tensor, or nullptr if the provided name does not ma...
Definition: NvInferRuntime.h:3159
bool setOutputAllocator(char const *tensorName, IOutputAllocator *outputAllocator) noexcept
Set output allocator to use for output tensor of given name. Pass nullptr to outputAllocator to unset...
Definition: NvInferRuntime.h:3278
TRT_DEPRECATED bool enqueueV2(void *const *bindings, cudaStream_t stream, cudaEvent_t *inputConsumed) noexcept
Enqueue inference on a stream.
Definition: NvInferRuntime.h:2998
bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
Select an optimization profile for the current context with async semantics.
Definition: NvInferRuntime.h:3045
TRT_DEPRECATED bool setInputShapeBinding(int32_t bindingIndex, int32_t const *data) noexcept
Set values of input tensor required by shape calculations.
Definition: NvInferRuntime.h:2858
apiv::VExecutionContext * mImpl
Definition: NvInferRuntime.h:3456
void setPersistentCacheLimit(size_t size) noexcept
Set the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:3372
size_t getPersistentCacheLimit() const noexcept
Get the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:3383
ICudaEngine const & getEngine() const noexcept
Get the associated engine.
Definition: NvInferRuntime.h:2547
ProfilingVerbosity getNvtxVerbosity() const noexcept
Get the NVTX verbosity of the execution context.
Definition: NvInferRuntime.h:3419
void setAuxStreams(cudaStream_t *auxStreams, int32_t nbStreams) noexcept
Set the auxiliary streams that TensorRT should launch kernels on in the next enqueueV3() call.
Definition: NvInferRuntime.h:3450
int64_t getMaxOutputSize(char const *tensorName) const noexcept
Get upper bound on an output tensor's size, in bytes, based on the current optimization profile and i...
Definition: NvInferRuntime.h:3309
int32_t inferShapes(int32_t nbMaxNames, char const **tensorNames) noexcept
Run shape calculations.
Definition: NvInferRuntime.h:3233
bool setTensorAddress(char const *tensorName, void *data) noexcept
Set memory address for given input or output tensor.
Definition: NvInferRuntime.h:3142
bool setTemporaryStorageAllocator(IGpuAllocator *allocator) noexcept
Specify allocator to use for internal temporary storage.
Definition: NvInferRuntime.h:3329
void * getOutputTensorAddress(char const *tensorName) const noexcept
Get memory address for given output.
Definition: NvInferRuntime.h:3200
bool enqueueV3(cudaStream_t stream) noexcept
Enqueue inference on a stream.
Definition: NvInferRuntime.h:3357
int32_t getOptimizationProfile() const noexcept
Get the index of the currently selected optimization profile.
Definition: NvInferRuntime.h:2696
bool setInputTensorAddress(char const *tensorName, void const *data) noexcept
Set memory address for given input.
Definition: NvInferRuntime.h:3181
TRT_DEPRECATED void destroy() noexcept
Destroy this object.
Definition: NvInferRuntime.h:2559
bool getDebugSync() const noexcept
Get the debug sync flag.
Definition: NvInferRuntime.h:2517
TRT_DEPRECATED Dims getBindingDimensions(int32_t bindingIndex) const noexcept
Get the dynamic dimensions of a binding.
Definition: NvInferRuntime.h:2786
bool setInputConsumedEvent(cudaEvent_t event) noexcept
Mark input as consumed.
Definition: NvInferRuntime.h:3249
Dims getTensorStrides(char const *tensorName) const noexcept
Return the strides of the buffer for the given tensor name.
Definition: NvInferRuntime.h:2644
bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
Set the verbosity of the NVTX markers in the execution context.
Definition: NvInferRuntime.h:3407
IProfiler * getProfiler() const noexcept
Get the profiler.
Definition: NvInferRuntime.h:2537
TRT_DEPRECATED Dims getStrides(int32_t bindingIndex) const noexcept
Return the strides of the buffer for the given binding.
Definition: NvInferRuntime.h:2623
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:2936
TRT_DEPRECATED bool setOptimizationProfile(int32_t profileIndex) noexcept
Select an optimization profile for the current context.
Definition: NvInferRuntime.h:2682
bool allInputDimensionsSpecified() const noexcept
Whether all dynamic dimensions of input tensors have been specified.
Definition: NvInferRuntime.h:2900
void setProfiler(IProfiler *profiler) noexcept
Set the profiler.
Definition: NvInferRuntime.h:2527
TRT_DEPRECATED bool getShapeBinding(int32_t bindingIndex, int32_t *data) const noexcept
Get values of an input tensor required for shape calculations or an output tensor produced by shape c...
Definition: NvInferRuntime.h:2882
void setName(char const *name) noexcept
Set the name of the execution context.
Definition: NvInferRuntime.h:2573
cudaEvent_t getInputConsumedEvent() const noexcept
The event associated with consuming the input.
Definition: NvInferRuntime.h:3259
bool allInputShapesSpecified() const noexcept
Whether all input shape bindings have been specified.
Definition: NvInferRuntime.h:2917
Definition: NvInferRuntime.h:282
IDimensionExpr const * operation(DimensionOperation op, IDimensionExpr const &first, IDimensionExpr const &second) noexcept
Definition: NvInferRuntime.h:292
virtual ~IExprBuilder() noexcept=default
apiv::VExprBuilder * mImpl
Definition: NvInferRuntime.h:299
IDimensionExpr const * constant(int32_t value) noexcept
Return pointer to IDimensionExp for given value.
Definition: NvInferRuntime.h:285
Application-implemented class for controlling allocation on the GPU.
Definition: NvInferRuntimeBase.h:367
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:144
void * data() const noexcept
A pointer to the raw data that is owned by the library.
Definition: NvInferRuntime.h:149
DataType type() const noexcept
The type of the memory that was allocated.
Definition: NvInferRuntime.h:161
TRT_DEPRECATED void destroy() noexcept
Definition: NvInferRuntime.h:172
std::size_t size() const noexcept
The size in bytes of the data that was allocated.
Definition: NvInferRuntime.h:155
apiv::VHostMemory * mImpl
Definition: NvInferRuntime.h:178
virtual ~IHostMemory() noexcept=default
A virtual base class to find a logger. Allows a plugin to find an instance of a logger if it needs to...
Definition: NvInferRuntime.h:3709
virtual ILogger * findLogger()=0
Get the logger used by the engine or execution context which called the plugin method.
virtual ~ILoggerFinder()=default
Application-implemented logging interface for the builder, refitter and runtime.
Definition: NvInferRuntimeBase.h:505
Forward declaration of IEngineInspector for use by other interfaces.
Definition: NvInferRuntime.h:43
INoCopy & operator=(INoCopy &&other)=delete
INoCopy(INoCopy const &other)=delete
INoCopy(INoCopy &&other)=delete
virtual ~INoCopy()=default
INoCopy & operator=(INoCopy const &other)=delete
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:1293
int32_t const * getShapeValues(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:1406
apiv::VOptimizationProfile * mImpl
Definition: NvInferRuntime.h:1459
Dims getDimensions(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:1334
virtual ~IOptimizationProfile() noexcept=default
float getExtraMemoryTarget() const noexcept
Get the extra memory target that has been defined for this profile.
Definition: NvInferRuntime.h:1436
bool setDimensions(char const *inputName, OptProfileSelector select, Dims dims) noexcept
Set the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:1322
bool setExtraMemoryTarget(float target) noexcept
Set a target for extra GPU memory that may be used by this profile.
Definition: NvInferRuntime.h:1424
bool setShapeValues(char const *inputName, OptProfileSelector select, int32_t const *values, int32_t nbValues) noexcept
Set the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:1380
bool isValid() const noexcept
Check whether the optimization profile can be passed to an IBuilderConfig object.
Definition: NvInferRuntime.h:1453
int32_t getNbShapeValues(char const *inputName) const noexcept
Get the number of values for an input shape tensor.
Definition: NvInferRuntime.h:1394
Callback from ExecutionContext::enqueueV3()
Definition: NvInferRuntime.h:2377
virtual void notifyShape(char const *tensorName, Dims const &dims) noexcept=0
Called by TensorRT when the shape of the output tensor is known.
virtual ~IOutputAllocator()=default
virtual int32_t getInterfaceVersion() const noexcept
Return the API version of this IOutputAllocator.
Definition: NvInferRuntime.h:2386
virtual void * reallocateOutput(char const *tensorName, void *currentMemory, uint64_t size, uint64_t alignment) noexcept=0
Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated.
Single registration point for all plugins in an application. It is used to find plugin implementation...
Definition: NvInferRuntimeCommon.h:50
virtual bool registerCreator(IPluginCreator &creator, AsciiChar const *const pluginNamespace) noexcept=0
Register a plugin creator. Returns false if one with same type is already registered.
Definition: NvInferRuntime.h:351
IPluginV2DynamicExt * clone() const noexcept override=0
Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin...
virtual ~IPluginV2DynamicExt() noexcept
Definition: NvInferRuntime.h:503
Plugin class for user-implemented layers.
Definition: NvInferRuntimePlugin.h:408
Application-implemented interface for profiling.
Definition: NvInferRuntime.h:556
virtual void reportLayerTime(char const *layerName, float ms) noexcept=0
Layer time reporting callback.
virtual ~IProfiler() noexcept
Definition: NvInferRuntime.h:567
Updates weights in an engine.
Definition: NvInferRuntime.h:960
int32_t getMaxThreads() const noexcept
get the maximum number of threads that can be used by the refitter.
Definition: NvInferRuntime.h:1233
bool setNamedWeights(char const *name, Weights weights) noexcept
Specify new weights of given name.
Definition: NvInferRuntime.h:1159
int32_t getAllWeights(int32_t size, char const **weightsNames) noexcept
Get names of all weights that could be refit.
Definition: NvInferRuntime.h:1195
bool setDynamicRange(char const *tensorName, float min, float max) noexcept
Definition: NvInferRuntime.h:1060
ILogger * getLogger() const noexcept
get the logger with which the refitter was created
Definition: NvInferRuntime.h:1205
int32_t getTensorsWithDynamicRange(int32_t size, char const **tensorNames) const noexcept
Get names of all tensors that have refittable dynamic ranges.
Definition: NvInferRuntime.h:1104
float getDynamicRangeMin(char const *tensorName) const noexcept
Get minimum of dynamic range.
Definition: NvInferRuntime.h:1074
bool refitCudaEngine() noexcept
Updates associated engine. Return true if successful.
Definition: NvInferRuntime.h:992
int32_t getMissingWeights(int32_t size, char const **weightsNames) noexcept
Get names of missing weights.
Definition: NvInferRuntime.h:1179
int32_t getMissing(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of missing weights.
Definition: NvInferRuntime.h:1013
TRT_DEPRECATED void destroy() noexcept
Definition: NvInferRuntime.h:1040
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:1219
float getDynamicRangeMax(char const *tensorName) const noexcept
Get maximum of dynamic range.
Definition: NvInferRuntime.h:1088
apiv::VRefitter * mImpl
Definition: NvInferRuntime.h:1239
int32_t getAll(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of all weights that could be refit.
Definition: NvInferRuntime.h:1030
virtual ~IRefitter() noexcept=default
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:1123
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:1138
Allows a serialized functionally unsafe engine to be deserialized.
Definition: NvInferRuntime.h:654
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:805
IRuntime * loadRuntime(char const *path) noexcept
Load IRuntime from the file.
Definition: NvInferRuntime.h:921
bool getEngineHostCodeAllowed() const noexcept
Get whether the runtime is allowed to deserialize engines with host executable code.
Definition: NvInferRuntime.h:943
TempfileControlFlags getTempfileControlFlags() const noexcept
Get the tempfile control flags for this runtime.
Definition: NvInferRuntime.h:893
void setEngineHostCodeAllowed(bool allowed) noexcept
Set whether the runtime is allowed to deserialize engines with host executable code.
Definition: NvInferRuntime.h:933
virtual ~IRuntime() noexcept=default
void setTemporaryDirectory(char const *path) noexcept
Set the directory that will be used by this runtime for temporary files.
Definition: NvInferRuntime.h:854
IPluginRegistry & getPluginRegistry() noexcept
Get the local plugin registry that can be used by the runtime.
Definition: NvInferRuntime.h:903
TRT_DEPRECATED void destroy() noexcept
Destroy this object.
Definition: NvInferRuntime.h:718
apiv::VRuntime * mImpl
Definition: NvInferRuntime.h:949
void setDLACore(int32_t dlaCore) noexcept
Sets the DLA core used by the network. Defaults to -1.
Definition: NvInferRuntime.h:689
int32_t getNbDLACores() const noexcept
Returns number of DLA hardware cores accessible or 0 if DLA is unavailable.
Definition: NvInferRuntime.h:706
ICudaEngine * deserializeCudaEngine(void const *blob, std::size_t size) noexcept
Deserialize an engine from a stream.
Definition: NvInferRuntime.h:781
void setTempfileControlFlags(TempfileControlFlags flags) noexcept
Set the tempfile control flags for this runtime.
Definition: NvInferRuntime.h:881
int32_t getDLACore() const noexcept
Get the DLA core that the engine executes on.
Definition: NvInferRuntime.h:698
void setGpuAllocator(IGpuAllocator *allocator) noexcept
Set the GPU allocator.
Definition: NvInferRuntime.h:732
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:766
ILogger * getLogger() const noexcept
get the logger with which the runtime was created
Definition: NvInferRuntime.h:791
int32_t getMaxThreads() const noexcept
Get the maximum number of threads that can be used by the runtime.
Definition: NvInferRuntime.h:819
char const * getTemporaryDirectory() const noexcept
Get the directory that will be used by this runtime for temporary files.
Definition: NvInferRuntime.h:865
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:751
Register the plugin creator to the registry The static registry object will be instantiated when the ...
Definition: NvInferRuntime.h:3681
PluginRegistrar()
Definition: NvInferRuntime.h:3683
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:126
DataType type
The type of the weights.
Definition: NvInferRuntime.h:128
int64_t count
The number of weights in the array.
Definition: NvInferRuntime.h:130
void const * values
The weight values, in a contiguous array.
Definition: NvInferRuntime.h:129
IRuntime * createInferRuntime(ILogger &logger) noexcept
Create an instance of an safe::IRuntime class.
The TensorRT API version 1 namespace.
uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferRuntime.h:1505
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:69
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:192
@ kSUB
Substract the second element from the first.
@ kSUM
Sum of the two operands.
@ kPROD
Product of the two operands.
@ kFLOOR_DIV
Floor division of the first element by the second.
@ kEQUAL
1 if operands are equal, 0 otherwise.
@ kMIN
Minimum of the two operands.
@ kLESS
1 if first operand is less than second operand, 0 otherwise.
@ kMAX
Maximum of the two operands.
@ kCEIL_DIV
Division rounding up.
TensorIOMode
Definition of tensor IO Mode.
Definition: NvInferRuntimeBase.h:883
HardwareCompatibilityLevel
Definition: NvInfer.h:9057
uint32_t TempfileControlFlags
Represents a collection of one or more TempfileControlFlag values combined using bitwise-OR operation...
Definition: NvInferRuntime.h:644
constexpr int32_t EnumMax< WeightsRole >() noexcept
Maximum number of elements in WeightsRole enum.
Definition: NvInferRuntime.h:588
@ kV2_DYNAMICEXT
IPluginV2DynamicExt.
constexpr int32_t EnumMax< LayerInformationFormat >() noexcept
Definition: NvInferRuntime.h:3475
DataType
The type of weights and tensors.
Definition: NvInferRuntimeBase.h:120
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:599
@ kCONSTANT
Constant layer.
@ kDEFAULT
Similar to ONNX Gather.
TempfileControlFlag
Flags used to control TensorRT's behavior when creating executable temporary files.
Definition: NvInferRuntime.h:622
@ kALLOW_IN_MEMORY_FILES
Allow creating and loading files in-memory (or unnamed files).
constexpr int32_t EnumMax< OptProfileSelector >() noexcept
Number of different values of OptProfileSelector enum.
Definition: NvInferRuntime.h:1265
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:577
@ kSHIFT
shift part of IScaleLayer
@ kANY
Any other weights role.
@ kBIAS
bias for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
@ kKERNEL
kernel for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
constexpr int32_t EnumMax< ProfilingVerbosity >() noexcept
Maximum number of profile verbosity levels in ProfilingVerbosity enum.
Definition: NvInferRuntime.h:1530
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
Definition: NvInferRuntime.h:1517
@ kLAYER_NAMES_ONLY
Print only the layer names. This is the default setting.
@ kDETAILED
Print detailed layer information including layer names and layer parameters.
TacticSource
List of tactic sources for TensorRT.
Definition: NvInferRuntime.h:1472
@ kEDGE_MASK_CONVOLUTIONS
TensorFormat PluginFormat
PluginFormat is reserved for backward compatibility.
Definition: NvInferRuntimePlugin.h:41
@ kMIN
Minimum of the two elements.
constexpr int32_t EnumMax< TempfileControlFlag >() noexcept
Maximum number of elements in TempfileControlFlag enum.
Definition: NvInferRuntime.h:634
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntimeBase.h:209
constexpr int32_t EnumMax< TacticSource >() noexcept
Maximum number of tactic sources in TacticSource enum.
Definition: NvInferRuntime.h:1494
LayerInformationFormat
The format in which the IEngineInspector prints the layer information.
Definition: NvInferRuntime.h:3467
@ kJSON
Print layer information in JSON format.
@ kONELINE
Print layer information in one line per layer.
constexpr int32_t EnumMax< DeviceType >() noexcept
Maximum number of elements in DeviceType enum.
Definition: NvInferRuntime.h:606
constexpr int32_t EnumMax< DimensionOperation >() noexcept
Maximum number of elements in DimensionOperation enum.
Definition: NvInferRuntime.h:206
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntime.h:216
@ kHOST
Data stored on host.
@ kDEVICE
Data stored on device.
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:1253
@ kOPT
This is used to set or get the value that is used in the optimization (kernel selection).
Definition: NvInferRuntime.h:321
Dims min
Lower bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:326
Dims max
Upper bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:329
PluginTensorDesc desc
Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of ...
Definition: NvInferRuntime.h:323
Fields that a plugin might see for an input or output.
Definition: NvInferRuntimePlugin.h:54
Declaration of EnumMaxImpl struct to store maximum number of elements in an enumeration type.
Definition: NvInferRuntimeBase.h:105