13#ifndef NV_INFER_RUNTIME_H
14#define NV_INFER_RUNTIME_H
22#include "NvInferImpl.h"
28class IExecutionContext;
31class IEngineInspector;
107 static constexpr int32_t kVALUE = 3;
151 return mImpl->data();
155 std::size_t
size() const noexcept
157 return mImpl->size();
163 return mImpl->type();
227 static constexpr int32_t kVALUE = 2;
249 return mImpl->isConstant();
256 return mImpl->getConstantValue();
287 return mImpl->constant(value);
295 return mImpl->operation(op, first, second);
380 int32_t outputIndex,
DimsExprs const* inputs, int32_t nbInputs,
IExprBuilder& exprBuilder) noexcept = 0;
385 static constexpr int32_t kFORMAT_COMBINATION_LIMIT = 100;
419 virtual
bool supportsFormatCombination(
420 int32_t pos,
PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
473 int32_t nbOutputs) const noexcept = 0;
488 void const* const* inputs,
void* const* outputs,
void* workspace, cudaStream_t stream) noexcept = 0;
498 int32_t getTensorRTVersion() const noexcept
override
509 bool const*,
PluginFormat, int32_t)
noexcept override final
518 Dims getOutputDimensions(int32_t,
Dims const*, int32_t)
noexcept override final
523 bool isOutputBroadcastAcrossBatch(int32_t,
bool const*, int32_t)
const noexcept override final
528 bool canBroadcastInputAcrossBatch(int32_t)
const noexcept override final
533 size_t getWorkspaceSize(int32_t)
const noexcept override final
538 int32_t enqueue(int32_t,
void const*
const*,
void*
const*,
void*, cudaStream_t)
noexcept override final
638 void const* blob, std::
size_t size, IPluginFactory* pluginFactory) noexcept
640 return mImpl->deserializeCudaEngine(blob, size,
nullptr);
655 mImpl->setDLACore(dlaCore);
664 return mImpl->getDLACore();
672 return mImpl->getNbDLACores();
698 mImpl->setGpuAllocator(allocator);
717 mImpl->setErrorRecorder(recorder);
732 return mImpl->getErrorRecorder();
747 return mImpl->deserializeCudaEngine(blob, size,
nullptr);
757 return mImpl->getLogger();
771 return mImpl->setMaxThreads(maxThreads);
785 return mImpl->getMaxThreads();
819 return mImpl->setWeights(layerName, role, weights);
834 return mImpl->refitCudaEngine();
855 return mImpl->getMissing(size, layerNames, roles);
872 return mImpl->getAll(size, layerNames, roles);
902 return mImpl->setDynamicRange(tensorName, min, max);
916 return mImpl->getDynamicRangeMin(tensorName);
930 return mImpl->getDynamicRangeMax(tensorName);
946 return mImpl->getTensorsWithDynamicRange(size, tensorNames);
965 mImpl->setErrorRecorder(recorder);
980 return mImpl->getErrorRecorder();
1001 return mImpl->setNamedWeights(name, weights);
1021 return mImpl->getMissingWeights(size, weightsNames);
1037 return mImpl->getAllWeights(size, weightsNames);
1047 return mImpl->getLogger();
1061 return mImpl->setMaxThreads(maxThreads);
1075 return mImpl->getMaxThreads();
1164 return mImpl->setDimensions(inputName, select, dims);
1176 return mImpl->getDimensions(inputName, select);
1221 char const* inputName,
OptProfileSelector select, int32_t
const* values, int32_t nbValues)
noexcept
1223 return mImpl->setShapeValues(inputName, select, values, nbValues);
1236 return mImpl->getNbShapeValues(inputName);
1248 return mImpl->getShapeValues(inputName, select);
1266 return mImpl->setExtraMemoryTarget(target);
1278 return mImpl->getExtraMemoryTarget();
1295 return mImpl->isValid();
1401 return mImpl->getNbBindings();
1429 return mImpl->getBindingIndex(name);
1452 return mImpl->getBindingName(bindingIndex);
1467 return mImpl->bindingIsInput(bindingIndex);
1494 return mImpl->getBindingDimensions(bindingIndex);
1509 return mImpl->getTensorShape(tensorName);
1524 return mImpl->getBindingDataType(bindingIndex);
1539 return mImpl->getTensorDataType(tensorName);
1555 return mImpl->getMaxBatchSize();
1569 return mImpl->getNbLayers();
1583 return mImpl->serialize();
1599 return mImpl->createExecutionContext();
1629 return mImpl->getLocation(bindingIndex);
1646 return mImpl->getTensorLocation(tensorName);
1666 return mImpl->isShapeInferenceIO(tensorName);
1680 return mImpl->getTensorIOMode(tensorName);
1689 return mImpl->createExecutionContextWithoutDeviceMemory();
1699 return mImpl->getDeviceMemorySize();
1709 return mImpl->isRefittable();
1726 return mImpl->getBindingBytesPerComponent(bindingIndex);
1743 return mImpl->getTensorBytesPerComponent(tensorName);
1759 return mImpl->getBindingComponentsPerElement(bindingIndex);
1776 return mImpl->getTensorComponentsPerElement(tensorName);
1790 return mImpl->getBindingFormat(bindingIndex);
1803 return mImpl->getTensorFormat(tensorName);
1827 return mImpl->getBindingFormatDesc(bindingIndex);
1849 return mImpl->getTensorFormatDesc(tensorName);
1865 return mImpl->getBindingVectorizedDim(bindingIndex);
1880 return mImpl->getTensorVectorizedDim(tensorName);
1895 return mImpl->getName();
1906 return mImpl->getNbOptimizationProfiles();
1937 int32_t bindingIndex, int32_t profileIndex,
OptProfileSelector select)
const noexcept
1939 return mImpl->getProfileDimensions(bindingIndex, profileIndex, select);
1959 return mImpl->getProfileShape(tensorName, profileIndex, select);
1989 return mImpl->getProfileShapeValues(profileIndex, inputIndex, select);
2027 return mImpl->isShapeBinding(bindingIndex);
2044 return mImpl->isExecutionBinding(bindingIndex);
2059 return mImpl->getEngineCapability();
2077 return mImpl->setErrorRecorder(recorder);
2092 return mImpl->getErrorRecorder();
2111 return mImpl->hasImplicitBatchDimension();
2126 return mImpl->getTacticSources();
2137 return mImpl->getProfilingVerbosity();
2147 return mImpl->createEngineInspector();
2160 return mImpl->getNbIOTensors();
2172 return mImpl->getIOTensorName(index);
2219 virtual void*
reallocateOutput(
char const* tensorName,
void* currentMemory, uint64_t size, uint64_t alignment)
noexcept = 0;
2273 return mImpl->execute(batchSize, bindings);
2306 int32_t batchSize,
void*
const* bindings, cudaStream_t stream, cudaEvent_t* inputConsumed)
noexcept
2308 return mImpl->enqueue(batchSize, bindings, stream, inputConsumed);
2321 mImpl->setDebugSync(sync);
2331 return mImpl->getDebugSync();
2341 mImpl->setProfiler(profiler);
2351 return mImpl->getProfiler();
2361 return mImpl->getEngine();
2387 mImpl->setName(name);
2397 return mImpl->getName();
2414 mImpl->setDeviceMemory(memory);
2437 return mImpl->getStrides(bindingIndex);
2458 return mImpl->getTensorStrides(tensorName);
2502 return mImpl->setOptimizationProfile(profileIndex);
2514 return mImpl->getOptimizationProfile();
2553 return mImpl->setBindingDimensions(bindingIndex, dimensions);
2571 return mImpl->setInputShape(tensorName, dims);
2604 return mImpl->getBindingDimensions(bindingIndex);
2641 return mImpl->getTensorShape(tensorName);
2676 return mImpl->setInputShapeBinding(bindingIndex, data);
2700 return mImpl->getShapeBinding(bindingIndex, data);
2718 return mImpl->allInputDimensionsSpecified();
2735 return mImpl->allInputShapesSpecified();
2754 mImpl->setErrorRecorder(recorder);
2769 return mImpl->getErrorRecorder();
2786 return mImpl->executeV2(bindings);
2816 return mImpl->enqueueV2(bindings, stream, inputConsumed);
2863 return mImpl->setOptimizationProfileAsync(profileIndex, stream);
2878 mImpl->setEnqueueEmitsProfile(enqueueEmitsProfile);
2889 return mImpl->getEnqueueEmitsProfile();
2918 return mImpl->reportToProfiler();
2960 return mImpl->setTensorAddress(tensorName, data);
2977 return mImpl->getTensorAddress(tensorName);
2999 return mImpl->setInputTensorAddress(tensorName, data);
3018 return mImpl->getOutputTensorAddress(tensorName);
3049 int32_t
inferShapes(int32_t nbMaxNames,
char const** tensorNames)
noexcept
3051 return mImpl->inferShapes(nbMaxNames, tensorNames);
3067 return mImpl->setInputConsumedEvent(event);
3077 return mImpl->getInputConsumedEvent();
3096 return mImpl->setOutputAllocator(tensorName, outputAllocator);
3109 return mImpl->getOutputAllocator(tensorName);
3127 return mImpl->getMaxOutputSize(tensorName);
3147 return mImpl->setTemporaryStorageAllocator(allocator);
3157 return mImpl->getTemporaryStorageAllocator();
3175 return mImpl->enqueueV3(stream);
3190 mImpl->setPersistentCacheLimit(size);
3201 return mImpl->getPersistentCacheLimit();
3225 return mImpl->setNvtxVerbosity(verbosity);
3237 return mImpl->getNvtxVerbosity();
3299 return mImpl->setExecutionContext(context);
3311 return mImpl->getExecutionContext();
3336 return mImpl->getLayerInformation(layerIndex, format);
3361 return mImpl->getEngineInformation(format);
3380 mImpl->setErrorRecorder(recorder);
3395 return mImpl->getErrorRecorder();
3408extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(
void* logger, int32_t version)
noexcept;
3414extern "C" TENSORRTAPI void* createInferRefitter_INTERNAL(
void* engine,
void* logger, int32_t version)
noexcept;
3448inline IRefitter* createInferRefitter(ICudaEngine& engine, ILogger& logger)
noexcept
3450 return static_cast<IRefitter*
>(createInferRefitter_INTERNAL(&engine, &logger,
NV_TENSORRT_VERSION));
3466template <
typename T>
3482#define REGISTER_TENSORRT_PLUGIN(name) \
3483 static nvinfer1::PluginRegistrar<name> pluginRegistrar##name {}
nvinfer1::ILogger * getLogger() noexcept
Return the logger object.
nvinfer1::IPluginRegistry * getPluginRegistry() noexcept
Return the plugin registry.
#define TENSORRTAPI
Definition: NvInferRuntimeCommon.h:54
#define NV_TENSORRT_VERSION
Definition: NvInferRuntimeCommon.h:73
#define TRT_DEPRECATED
Definition: NvInferRuntimeCommon.h:40
#define TRT_DEPRECATED_ENUM
Definition: NvInferRuntimeCommon.h:41
Structure to define the dimensions of a tensor.
Definition: NvInferRuntimeCommon.h:171
static constexpr int32_t MAX_DIMS
The maximum rank (number of dimensions) supported for a tensor.
Definition: NvInferRuntimeCommon.h:174
Definition: NvInferRuntime.h:309
int32_t nbDims
The number of dimensions.
Definition: NvInferRuntime.h:311
An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:1383
int32_t getTensorBytesPerComponent(char const *tensorName) const noexcept
Return the number of bytes per component of an element, or -1 if the provided name does not map to an...
Definition: NvInferRuntime.h:1741
bool hasImplicitBatchDimension() const noexcept
Query whether the engine was built with an implicit batch dimension.
Definition: NvInferRuntime.h:2109
char const * getIOTensorName(int32_t index) const noexcept
Return name of an IO tensor.
Definition: NvInferRuntime.h:2170
EngineCapability getEngineCapability() const noexcept
Determine what execution capability this engine has.
Definition: NvInferRuntime.h:2057
TRT_DEPRECATED bool isExecutionBinding(int32_t bindingIndex) const noexcept
True if pointer to tensor data is required for execution phase, false if nullptr can be supplied.
Definition: NvInferRuntime.h:2042
TRT_DEPRECATED int32_t getBindingIndex(char const *name) const noexcept
Retrieve the binding index for a named tensor.
Definition: NvInferRuntime.h:1427
TRT_DEPRECATED void destroy() noexcept
Destroy this object;.
Definition: NvInferRuntime.h:1609
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:2090
TRT_DEPRECATED char const * getBindingName(int32_t bindingIndex) const noexcept
Retrieve the name corresponding to a binding index.
Definition: NvInferRuntime.h:1450
apiv::VCudaEngine * mImpl
Definition: NvInferRuntime.h:2176
char const * getTensorFormatDesc(char const *tensorName) const noexcept
Return the human readable description of the tensor format, or empty string if the provided name does...
Definition: NvInferRuntime.h:1847
Dims getProfileShape(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for an input tensor given its name under an optimizati...
Definition: NvInferRuntime.h:1957
TRT_DEPRECATED int32_t const * getProfileShapeValues(int32_t profileIndex, int32_t inputIndex, OptProfileSelector select) const noexcept
Get minimum / optimum / maximum values for an input shape binding under an optimization profile.
Definition: NvInferRuntime.h:1986
TRT_DEPRECATED bool bindingIsInput(int32_t bindingIndex) const noexcept
Determine whether a binding is an input binding.
Definition: NvInferRuntime.h:1465
TRT_DEPRECATED Dims getBindingDimensions(int32_t bindingIndex) const noexcept
Get the dimensions of a binding.
Definition: NvInferRuntime.h:1492
DataType getTensorDataType(char const *tensorName) const noexcept
Determine the required data type for a buffer from its tensor name.
Definition: NvInferRuntime.h:1537
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:2075
size_t getDeviceMemorySize() const noexcept
Return the amount of device memory required by an execution context.
Definition: NvInferRuntime.h:1697
TacticSources getTacticSources() const noexcept
return the tactic sources required by this engine.
Definition: NvInferRuntime.h:2124
TRT_DEPRECATED TensorFormat getBindingFormat(int32_t bindingIndex) const noexcept
Return the binding format.
Definition: NvInferRuntime.h:1788
virtual ~ICudaEngine() noexcept=default
TRT_DEPRECATED int32_t getBindingComponentsPerElement(int32_t bindingIndex) const noexcept
Return the number of components included in one element.
Definition: NvInferRuntime.h:1757
char const * getName() const noexcept
Returns the name of the network associated with the engine.
Definition: NvInferRuntime.h:1893
ProfilingVerbosity getProfilingVerbosity() const noexcept
Return the ProfilingVerbosity the builder config was set to when the engine was built.
Definition: NvInferRuntime.h:2135
bool isShapeInferenceIO(char const *tensorName) const noexcept
True if tensor is required as input for shape calculations or is output from shape calculations.
Definition: NvInferRuntime.h:1664
TRT_DEPRECATED bool isShapeBinding(int32_t bindingIndex) const noexcept
True if tensor is required as input for shape calculations or output from them.
Definition: NvInferRuntime.h:2025
int32_t getTensorVectorizedDim(char const *tensorName) const noexcept
Return the dimension index that the buffer is vectorized, or -1 if the provided name does not map to ...
Definition: NvInferRuntime.h:1878
TRT_DEPRECATED DataType getBindingDataType(int32_t bindingIndex) const noexcept
Determine the required data type for a buffer from its binding index.
Definition: NvInferRuntime.h:1522
TRT_DEPRECATED int32_t getBindingVectorizedDim(int32_t bindingIndex) const noexcept
Return the dimension index that the buffer is vectorized, or -1 is the name is not found.
Definition: NvInferRuntime.h:1863
TRT_DEPRECATED char const * getBindingFormatDesc(int32_t bindingIndex) const noexcept
Return the human readable description of the tensor format, or nullptr if the provided name does not ...
Definition: NvInferRuntime.h:1825
TensorFormat getTensorFormat(char const *tensorName) const noexcept
Return the binding format, or TensorFormat::kLINEAR if the provided name does not map to an input or ...
Definition: NvInferRuntime.h:1801
IHostMemory * serialize() const noexcept
Serialize the network to a stream.
Definition: NvInferRuntime.h:1581
IExecutionContext * createExecutionContextWithoutDeviceMemory() noexcept
create an execution context without any device memory allocated
Definition: NvInferRuntime.h:1687
TensorLocation getTensorLocation(char const *tensorName) const noexcept
Get whether an input or output tensor must be on GPU or CPU.
Definition: NvInferRuntime.h:1644
IExecutionContext * createExecutionContext() noexcept
Create an execution context.
Definition: NvInferRuntime.h:1597
IEngineInspector * createEngineInspector() const noexcept
Create a new engine inspector which prints the layer information in an engine or an execution context...
Definition: NvInferRuntime.h:2145
TRT_DEPRECATED int32_t getMaxBatchSize() const noexcept
Get the maximum batch size which can be used for inference. Should only be called if the engine is bu...
Definition: NvInferRuntime.h:1553
int32_t getNbOptimizationProfiles() const noexcept
Get the number of optimization profiles defined for this engine.
Definition: NvInferRuntime.h:1904
TensorIOMode getTensorIOMode(char const *tensorName) const noexcept
Determine whether a tensor is an input or output tensor.
Definition: NvInferRuntime.h:1678
int32_t getNbLayers() const noexcept
Get the number of layers in the network.
Definition: NvInferRuntime.h:1567
TRT_DEPRECATED TensorLocation getLocation(int32_t bindingIndex) const noexcept
Get location of binding.
Definition: NvInferRuntime.h:1627
int32_t getNbIOTensors() const noexcept
Return number of IO tensors.
Definition: NvInferRuntime.h:2158
TRT_DEPRECATED Dims getProfileDimensions(int32_t bindingIndex, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a particular input binding under an optimization p...
Definition: NvInferRuntime.h:1936
TRT_DEPRECATED int32_t getBindingBytesPerComponent(int32_t bindingIndex) const noexcept
Return the number of bytes per component of an element.
Definition: NvInferRuntime.h:1724
Dims getTensorShape(char const *tensorName) const noexcept
Get shape of an input or output tensor.
Definition: NvInferRuntime.h:1507
int32_t getTensorComponentsPerElement(char const *tensorName) const noexcept
Return the number of components included in one element, or -1 if the provided name does not map to a...
Definition: NvInferRuntime.h:1774
bool isRefittable() const noexcept
Return true if an engine can be refit.
Definition: NvInferRuntime.h:1707
Definition: NvInferRuntime.h:244
bool isConstant() const noexcept
Return true if expression is a build-time constant.
Definition: NvInferRuntime.h:247
virtual ~IDimensionExpr() noexcept=default
apiv::VDimensionExpr * mImpl
Definition: NvInferRuntime.h:260
int32_t getConstantValue() const noexcept
Definition: NvInferRuntime.h:254
An engine inspector which prints out the layer information of an engine or an execution context.
Definition: NvInferRuntime.h:3281
char const * getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
Get a string describing the information about a specific layer in the current engine or the execution...
Definition: NvInferRuntime.h:3334
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:3393
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:3378
IExecutionContext const * getExecutionContext() const noexcept
Get the context currently being inspected.
Definition: NvInferRuntime.h:3309
apiv::VEngineInspector * mImpl
Definition: NvInferRuntime.h:3399
virtual ~IEngineInspector() noexcept=default
char const * getEngineInformation(LayerInformationFormat format) const noexcept
Get a string describing the information about all the layers in the current engine or the execution c...
Definition: NvInferRuntime.h:3359
Reference counted application-implemented error reporting interface for TensorRT objects.
Definition: NvInferRuntimeCommon.h:1689
Context for executing inference using an engine, with functionally unsafe features.
Definition: NvInferRuntime.h:2245
TRT_DEPRECATED bool enqueue(int32_t batchSize, void *const *bindings, cudaStream_t stream, cudaEvent_t *inputConsumed) noexcept
Asynchronously execute inference on a batch.
Definition: NvInferRuntime.h:2305
IOutputAllocator * getOutputAllocator(char const *tensorName) const noexcept
Get output allocator associated with output tensor of given name, or nullptr if the provided name doe...
Definition: NvInferRuntime.h:3107
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:2767
bool reportToProfiler() const noexcept
Calculate layer timing info for the current optimization profile in IExecutionContext and update the ...
Definition: NvInferRuntime.h:2916
virtual ~IExecutionContext() noexcept=default
void setDeviceMemory(void *memory) noexcept
Set the device memory for use by this execution context.
Definition: NvInferRuntime.h:2412
char const * getName() const noexcept
Return the name of the execution context.
Definition: NvInferRuntime.h:2395
void setDebugSync(bool sync) noexcept
Set the debug sync flag.
Definition: NvInferRuntime.h:2319
IGpuAllocator * getTemporaryStorageAllocator() const noexcept
Get allocator set by setTemporaryStorageAllocator.
Definition: NvInferRuntime.h:3155
void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
Set whether enqueue emits layer timing to the profiler.
Definition: NvInferRuntime.h:2876
Dims getTensorShape(char const *tensorName) const noexcept
Return the shape of the given input or output.
Definition: NvInferRuntime.h:2639
bool setInputShape(char const *tensorName, Dims const &dims) noexcept
Set shape of given input.
Definition: NvInferRuntime.h:2569
bool executeV2(void *const *bindings) noexcept
Synchronously execute inference a network.
Definition: NvInferRuntime.h:2784
bool getEnqueueEmitsProfile() const noexcept
Get the enqueueEmitsProfile state.
Definition: NvInferRuntime.h:2887
TRT_DEPRECATED bool setBindingDimensions(int32_t bindingIndex, Dims dimensions) noexcept
Set the dynamic dimensions of an input binding.
Definition: NvInferRuntime.h:2551
void const * getTensorAddress(char const *tensorName) const noexcept
Get memory address bound to given input or output tensor, or nullptr if the provided name does not ma...
Definition: NvInferRuntime.h:2975
bool setOutputAllocator(char const *tensorName, IOutputAllocator *outputAllocator) noexcept
Set output allocator to use for output tensor of given name. Pass nullptr to outputAllocator to unset...
Definition: NvInferRuntime.h:3094
TRT_DEPRECATED bool enqueueV2(void *const *bindings, cudaStream_t stream, cudaEvent_t *inputConsumed) noexcept
Asynchronously execute inference.
Definition: NvInferRuntime.h:2814
bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
Select an optimization profile for the current context with async semantics.
Definition: NvInferRuntime.h:2861
TRT_DEPRECATED bool setInputShapeBinding(int32_t bindingIndex, int32_t const *data) noexcept
Set values of input tensor required by shape calculations.
Definition: NvInferRuntime.h:2674
apiv::VExecutionContext * mImpl
Definition: NvInferRuntime.h:3241
void setPersistentCacheLimit(size_t size) noexcept
Set the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:3188
size_t getPersistentCacheLimit() const noexcept
Get the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:3199
ICudaEngine const & getEngine() const noexcept
Get the associated engine.
Definition: NvInferRuntime.h:2359
ProfilingVerbosity getNvtxVerbosity() const noexcept
Get the NVTX verbosity of the execution context.
Definition: NvInferRuntime.h:3235
int64_t getMaxOutputSize(char const *tensorName) const noexcept
Get upper bound on an output tensor's size, in bytes, based on the current optimization profile and i...
Definition: NvInferRuntime.h:3125
int32_t inferShapes(int32_t nbMaxNames, char const **tensorNames) noexcept
Run shape calculations.
Definition: NvInferRuntime.h:3049
bool setTensorAddress(char const *tensorName, void *data) noexcept
Set memory address for given input or output tensor.
Definition: NvInferRuntime.h:2958
bool setTemporaryStorageAllocator(IGpuAllocator *allocator) noexcept
Specify allocator to use for internal temporary storage.
Definition: NvInferRuntime.h:3145
void * getOutputTensorAddress(char const *tensorName) const noexcept
Get memory address for given output.
Definition: NvInferRuntime.h:3016
bool enqueueV3(cudaStream_t stream) noexcept
Asynchronously execute inference.
Definition: NvInferRuntime.h:3173
int32_t getOptimizationProfile() const noexcept
Get the index of the currently selected optimization profile.
Definition: NvInferRuntime.h:2512
bool setInputTensorAddress(char const *tensorName, void const *data) noexcept
Set memory address for given input.
Definition: NvInferRuntime.h:2997
TRT_DEPRECATED void destroy() noexcept
Destroy this object.
Definition: NvInferRuntime.h:2371
bool getDebugSync() const noexcept
Get the debug sync flag.
Definition: NvInferRuntime.h:2329
TRT_DEPRECATED Dims getBindingDimensions(int32_t bindingIndex) const noexcept
Get the dynamic dimensions of a binding.
Definition: NvInferRuntime.h:2602
bool setInputConsumedEvent(cudaEvent_t event) noexcept
Mark input as consumed.
Definition: NvInferRuntime.h:3065
Dims getTensorStrides(char const *tensorName) const noexcept
Return the strides of the buffer for the given tensor name.
Definition: NvInferRuntime.h:2456
bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
Set the verbosity of the NVTX markers in the execution context.
Definition: NvInferRuntime.h:3223
IProfiler * getProfiler() const noexcept
Get the profiler.
Definition: NvInferRuntime.h:2349
TRT_DEPRECATED Dims getStrides(int32_t bindingIndex) const noexcept
Return the strides of the buffer for the given binding.
Definition: NvInferRuntime.h:2435
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:2752
TRT_DEPRECATED bool setOptimizationProfile(int32_t profileIndex) noexcept
Select an optimization profile for the current context.
Definition: NvInferRuntime.h:2500
bool allInputDimensionsSpecified() const noexcept
Whether all dynamic dimensions of input tensors have been specified.
Definition: NvInferRuntime.h:2716
void setProfiler(IProfiler *profiler) noexcept
Set the profiler.
Definition: NvInferRuntime.h:2339
TRT_DEPRECATED bool getShapeBinding(int32_t bindingIndex, int32_t *data) const noexcept
Get values of an input tensor required for shape calculations or an output tensor produced by shape c...
Definition: NvInferRuntime.h:2698
void setName(char const *name) noexcept
Set the name of the execution context.
Definition: NvInferRuntime.h:2385
cudaEvent_t getInputConsumedEvent() const noexcept
The event associated with consuming the input.
Definition: NvInferRuntime.h:3075
bool allInputShapesSpecified() const noexcept
Whether all input shape bindings have been specified.
Definition: NvInferRuntime.h:2733
Definition: NvInferRuntime.h:282
IDimensionExpr const * operation(DimensionOperation op, IDimensionExpr const &first, IDimensionExpr const &second) noexcept
Definition: NvInferRuntime.h:292
virtual ~IExprBuilder() noexcept=default
apiv::VExprBuilder * mImpl
Definition: NvInferRuntime.h:299
IDimensionExpr const * constant(int32_t value) noexcept
Return pointer to IDimensionExp for given value.
Definition: NvInferRuntime.h:285
Application-implemented class for controlling allocation on the GPU.
Definition: NvInferRuntimeCommon.h:1362
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:144
void * data() const noexcept
A pointer to the raw data that is owned by the library.
Definition: NvInferRuntime.h:149
DataType type() const noexcept
The type of the memory that was allocated.
Definition: NvInferRuntime.h:161
TRT_DEPRECATED void destroy() noexcept
Definition: NvInferRuntime.h:172
std::size_t size() const noexcept
The size in bytes of the data that was allocated.
Definition: NvInferRuntime.h:155
apiv::VHostMemory * mImpl
Definition: NvInferRuntime.h:178
virtual ~IHostMemory() noexcept=default
Application-implemented logging interface for the builder, refitter and runtime.
Definition: NvInferRuntimeCommon.h:1500
Forward declaration of IEngineInspector for use by other interfaces.
Definition: NvInferRuntime.h:43
INoCopy & operator=(INoCopy &&other)=delete
INoCopy(INoCopy const &other)=delete
INoCopy(INoCopy &&other)=delete
virtual ~INoCopy()=default
INoCopy & operator=(INoCopy const &other)=delete
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:1133
int32_t const * getShapeValues(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:1246
apiv::VOptimizationProfile * mImpl
Definition: NvInferRuntime.h:1299
Dims getDimensions(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:1174
virtual ~IOptimizationProfile() noexcept=default
float getExtraMemoryTarget() const noexcept
Get the extra memory target that has been defined for this profile.
Definition: NvInferRuntime.h:1276
bool setDimensions(char const *inputName, OptProfileSelector select, Dims dims) noexcept
Set the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:1162
bool setExtraMemoryTarget(float target) noexcept
Set a target for extra GPU memory that may be used by this profile.
Definition: NvInferRuntime.h:1264
bool setShapeValues(char const *inputName, OptProfileSelector select, int32_t const *values, int32_t nbValues) noexcept
Set the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:1220
bool isValid() const noexcept
Check whether the optimization profile can be passed to an IBuilderConfig object.
Definition: NvInferRuntime.h:1293
int32_t getNbShapeValues(char const *inputName) const noexcept
Get the number of values for an input shape tensor.
Definition: NvInferRuntime.h:1234
Callback from ExecutionContext::enqueueV3()
Definition: NvInferRuntime.h:2189
virtual void notifyShape(char const *tensorName, Dims const &dims) noexcept=0
Called by TensorRT when the shape of the output tensor is known.
virtual ~IOutputAllocator()=default
virtual int32_t getInterfaceVersion() const noexcept
Return the API version of this IOutputAllocator.
Definition: NvInferRuntime.h:2198
virtual void * reallocateOutput(char const *tensorName, void *currentMemory, uint64_t size, uint64_t alignment) noexcept=0
Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated.
Single registration point for all plugins in an application. It is used to find plugin implementation...
Definition: NvInferRuntimeCommon.h:1234
virtual bool registerCreator(IPluginCreator &creator, AsciiChar const *const pluginNamespace) noexcept=0
Register a plugin creator. Returns false if one with same type is already registered.
Definition: NvInferRuntime.h:351
IPluginV2DynamicExt * clone() const noexcept override=0
Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin...
virtual ~IPluginV2DynamicExt() noexcept
Definition: NvInferRuntime.h:503
Plugin class for user-implemented layers.
Definition: NvInferRuntimeCommon.h:704
Application-implemented interface for profiling.
Definition: NvInferRuntime.h:556
virtual void reportLayerTime(char const *layerName, float ms) noexcept=0
Layer time reporting callback.
virtual ~IProfiler() noexcept
Definition: NvInferRuntime.h:566
Updates weights in an engine.
Definition: NvInferRuntime.h:800
int32_t getMaxThreads() const noexcept
get the maximum number of threads that can be used by the refitter.
Definition: NvInferRuntime.h:1073
bool setNamedWeights(char const *name, Weights weights) noexcept
Specify new weights of given name.
Definition: NvInferRuntime.h:999
int32_t getAllWeights(int32_t size, char const **weightsNames) noexcept
Get names of all weights that could be refit.
Definition: NvInferRuntime.h:1035
bool setDynamicRange(char const *tensorName, float min, float max) noexcept
Definition: NvInferRuntime.h:900
ILogger * getLogger() const noexcept
get the logger with which the refitter was created
Definition: NvInferRuntime.h:1045
int32_t getTensorsWithDynamicRange(int32_t size, char const **tensorNames) const noexcept
Get names of all tensors that have refittable dynamic ranges.
Definition: NvInferRuntime.h:944
float getDynamicRangeMin(char const *tensorName) const noexcept
Get minimum of dynamic range.
Definition: NvInferRuntime.h:914
bool refitCudaEngine() noexcept
Updates associated engine. Return true if successful.
Definition: NvInferRuntime.h:832
int32_t getMissingWeights(int32_t size, char const **weightsNames) noexcept
Get names of missing weights.
Definition: NvInferRuntime.h:1019
int32_t getMissing(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of missing weights.
Definition: NvInferRuntime.h:853
TRT_DEPRECATED void destroy() noexcept
Definition: NvInferRuntime.h:880
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:1059
float getDynamicRangeMax(char const *tensorName) const noexcept
Get maximum of dynamic range.
Definition: NvInferRuntime.h:928
apiv::VRefitter * mImpl
Definition: NvInferRuntime.h:1079
int32_t getAll(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of all weights that could be refit.
Definition: NvInferRuntime.h:870
virtual ~IRefitter() noexcept=default
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:963
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:978
Allows a serialized functionally unsafe engine to be deserialized.
Definition: NvInferRuntime.h:618
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:769
virtual ~IRuntime() noexcept=default
TRT_DEPRECATED void destroy() noexcept
Destroy this object.
Definition: NvInferRuntime.h:682
apiv::VRuntime * mImpl
Definition: NvInferRuntime.h:789
void setDLACore(int32_t dlaCore) noexcept
Sets the DLA core used by the network. Defaults to -1.
Definition: NvInferRuntime.h:653
int32_t getNbDLACores() const noexcept
Returns number of DLA hardware cores accessible or 0 if DLA is unavailable.
Definition: NvInferRuntime.h:670
ICudaEngine * deserializeCudaEngine(void const *blob, std::size_t size) noexcept
Deserialize an engine from a stream.
Definition: NvInferRuntime.h:745
int32_t getDLACore() const noexcept
Get the DLA core that the engine executes on.
Definition: NvInferRuntime.h:662
void setGpuAllocator(IGpuAllocator *allocator) noexcept
Set the GPU allocator.
Definition: NvInferRuntime.h:696
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:730
ILogger * getLogger() const noexcept
get the logger with which the runtime was created
Definition: NvInferRuntime.h:755
int32_t getMaxThreads() const noexcept
Get the maximum number of threads that can be used by the runtime.
Definition: NvInferRuntime.h:783
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:715
Register the plugin creator to the registry The static registry object will be instantiated when the ...
Definition: NvInferRuntime.h:3468
PluginRegistrar()
Definition: NvInferRuntime.h:3470
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:126
DataType type
The type of the weights.
Definition: NvInferRuntime.h:128
int64_t count
The number of weights in the array.
Definition: NvInferRuntime.h:130
void const * values
The weight values, in a contiguous array.
Definition: NvInferRuntime.h:129
IRuntime * createInferRuntime(ILogger &logger) noexcept
Create an instance of an safe::IRuntime class.
The TensorRT API version 1 namespace.
uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferRuntime.h:1345
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:69
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:192
@ kSUB
Substract the second element from the first.
@ kSUM
Sum of the two operands.
@ kPROD
Product of the two operands.
@ kFLOOR_DIV
Floor division of the first element by the second.
@ kEQUAL
1 if operands are equal, 0 otherwise.
@ kMIN
Minimum of the two operands.
@ kLESS
1 if first operand is less than second operand, 0 otherwise.
@ kMAX
Maximum of the two operands.
@ kCEIL_DIV
Division rounding up.
TensorIOMode
Definition of tensor IO Mode.
Definition: NvInferRuntimeCommon.h:1878
constexpr int32_t EnumMax< WeightsRole >() noexcept
Maximum number of elements in WeightsRole enum.
Definition: NvInferRuntime.h:587
@ kV2_DYNAMICEXT
IPluginV2DynamicExt.
constexpr int32_t EnumMax< LayerInformationFormat >() noexcept
Definition: NvInferRuntime.h:3260
DataType
The type of weights and tensors.
Definition: NvInferRuntimeCommon.h:117
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:598
@ kCONSTANT
Constant layer.
@ kDEFAULT
Similar to ONNX Gather.
constexpr int32_t EnumMax< OptProfileSelector >() noexcept
Number of different values of OptProfileSelector enum.
Definition: NvInferRuntime.h:1105
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:576
@ kSHIFT
shift part of IScaleLayer
@ kANY
Any other weights role.
@ kBIAS
bias for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
@ kKERNEL
kernel for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
constexpr int32_t EnumMax< ProfilingVerbosity >() noexcept
Maximum number of profile verbosity levels in ProfilingVerbosity enum.
Definition: NvInferRuntime.h:1370
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
Definition: NvInferRuntime.h:1357
@ kLAYER_NAMES_ONLY
Print only the layer names. This is the default setting.
@ kDETAILED
Print detailed layer information including layer names and layer parameters.
TacticSource
List of tactic sources for TensorRT.
Definition: NvInferRuntime.h:1312
@ kEDGE_MASK_CONVOLUTIONS
TensorFormat PluginFormat
PluginFormat is reserved for backward compatibility.
Definition: NvInferRuntimeCommon.h:326
@ kMIN
Minimum of the two elements.
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntimeCommon.h:201
constexpr int32_t EnumMax< TacticSource >() noexcept
Maximum number of tactic sources in TacticSource enum.
Definition: NvInferRuntime.h:1334
LayerInformationFormat
The format in which the IEngineInspector prints the layer information.
Definition: NvInferRuntime.h:3252
@ kJSON
Print layer information in JSON format.
@ kONELINE
Print layer information in one line per layer.
constexpr int32_t EnumMax< DeviceType >() noexcept
Maximum number of elements in DeviceType enum.
Definition: NvInferRuntime.h:605
constexpr int32_t EnumMax< DimensionOperation >() noexcept
Maximum number of elements in DimensionOperation enum.
Definition: NvInferRuntime.h:206
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntime.h:216
@ kHOST
Data stored on host.
@ kDEVICE
Data stored on device.
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:1093
@ kOPT
This is used to set or get the value that is used in the optimization (kernel selection).
Definition: NvInferRuntime.h:321
Dims min
Lower bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:326
Dims max
Upper bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:329
PluginTensorDesc desc
Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of ...
Definition: NvInferRuntime.h:323
Fields that a plugin might see for an input or output.
Definition: NvInferRuntimeCommon.h:350
Declaration of EnumMaxImpl struct to store maximum number of elements in an enumeration type.
Definition: NvInferRuntimeCommon.h:102