18#ifndef NV_INFER_RUNTIME_H
19#define NV_INFER_RUNTIME_H
28#define NV_INFER_INTERNAL_INCLUDE 1
30#undef NV_INFER_INTERNAL_INCLUDE
36class IExecutionContext;
39class IEngineInspector;
105 static constexpr int32_t kVALUE = 3;
149 return mImpl->data();
153 std::size_t
size() const noexcept
155 return mImpl->size();
161 return mImpl->type();
215 static constexpr int32_t kVALUE = 2;
239 return mImpl->isConstant();
250 return mImpl->getConstantValue();
265 return mImpl->isSizeTensor();
294 return mImpl->constant(value);
306 return mImpl->operation(op, first, second);
340 return mImpl->declareSizeTensor(outputIndex, opt, upper);
436 int32_t outputIndex,
DimsExprs const* inputs, int32_t nbInputs,
IExprBuilder& exprBuilder) noexcept = 0;
441 static constexpr int32_t kFORMAT_COMBINATION_LIMIT = 100;
475 virtual
bool supportsFormatCombination(
476 int32_t pos,
PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
528 int32_t nbOutputs) const noexcept = 0;
543 void const* const* inputs,
void* const* outputs,
void* workspace, cudaStream_t stream) noexcept = 0;
553 int32_t getTensorRTVersion() const noexcept
override
567 bool const*,
PluginFormat, int32_t)
noexcept override final
582 Dims getOutputDimensions(int32_t,
Dims const*, int32_t)
noexcept override final
594 TRT_DEPRECATED bool isOutputBroadcastAcrossBatch(int32_t,
bool const*, int32_t)
const noexcept override final
606 TRT_DEPRECATED bool canBroadcastInputAcrossBatch(int32_t)
const noexcept override final
614 size_t getWorkspaceSize(int32_t)
const noexcept override final
622 int32_t enqueue(int32_t,
void const*
const*,
void*
const*,
void*, cudaStream_t)
noexcept override final
656 virtual int64_t
read(
void* destination, int64_t nbBytes) = 0;
722 virtual int64_t
read(
void* destination, int64_t nbBytes, cudaStream_t stream)
noexcept = 0;
821 virtual
AsciiChar const* getPluginVersion() const noexcept = 0;
832 virtual
AsciiChar const* getPluginNamespace() const noexcept = 0;
843 static constexpr int32_t kDEFAULT_FORMAT_COMBINATION_LIMIT = 100;
892 DataType* outputTypes, int32_t nbOutputs,
const DataType* inputTypes, int32_t nbInputs)
const noexcept = 0;
916 int32_t nbShapeInputs,
DimsExprs* outputs, int32_t nbOutputs,
IExprBuilder& exprBuilder)
noexcept = 0;
1042 return kDEFAULT_FORMAT_COMBINATION_LIMIT;
1115 void const*
const* inputs,
void*
const* outputs,
void* workspace, cudaStream_t stream)
noexcept = 0;
1498 static constexpr int32_t kVALUE = 13;
1520 static constexpr int32_t kVALUE = 1;
1602 static constexpr int32_t kVALUE = 5;
1637 uint64_t
const size, uint64_t
const alignment,
AllocatorFlags const flags)
noexcept = 0;
1679 virtual void*
reallocate(
void*
const , uint64_t , uint64_t )
noexcept
1733 uint64_t
const size, uint64_t
const alignment,
AllocatorFlags const flags, cudaStream_t )
noexcept
1735 return allocate(size, alignment, flags);
1767 return deallocate(memory);
1775 return {
"IGpuAllocator", 1, 0};
1834 void setDLACore(int32_t dlaCore) noexcept
1836 mImpl->setDLACore(dlaCore);
1846 return mImpl->getDLACore();
1854 return mImpl->getNbDLACores();
1870 mImpl->setGpuAllocator(allocator);
1889 mImpl->setErrorRecorder(recorder);
1904 return mImpl->getErrorRecorder();
1919 return mImpl->deserializeCudaEngine(blob, size);
1940 return mImpl->deserializeCudaEngine(streamReader);
1960 return mImpl->deserializeCudaEngineV2(streamReader);
1970 return mImpl->getLogger();
1985 return mImpl->setMaxThreads(maxThreads);
1999 return mImpl->getMaxThreads();
2034 return mImpl->setTemporaryDirectory(path);
2045 return mImpl->getTemporaryDirectory();
2061 return mImpl->setTempfileControlFlags(flags);
2073 return mImpl->getTempfileControlFlags();
2083 return mImpl->getPluginRegistry();
2113 return mImpl->setEngineHostCodeAllowed(allowed);
2123 return mImpl->getEngineHostCodeAllowed();
2159 return mImpl->setWeights(layerName, role, weights);
2176 return mImpl->refitCudaEngine();
2197 return mImpl->getMissing(size, layerNames, roles);
2214 return mImpl->getAll(size, layerNames, roles);
2236 return mImpl->setDynamicRange(tensorName, min, max);
2252 return mImpl->getDynamicRangeMin(tensorName);
2268 return mImpl->getDynamicRangeMax(tensorName);
2286 return mImpl->getTensorsWithDynamicRange(size, tensorNames);
2305 mImpl->setErrorRecorder(recorder);
2320 return mImpl->getErrorRecorder();
2345 return mImpl->setNamedWeights(name, weights);
2365 return mImpl->getMissingWeights(size, weightsNames);
2381 return mImpl->getAllWeights(size, weightsNames);
2391 return mImpl->getLogger();
2407 return mImpl->setMaxThreads(maxThreads);
2421 return mImpl->getMaxThreads();
2448 return mImpl->setNamedWeightsWithLocation(name, weights, location);
2464 return mImpl->getNamedWeights(weightsName);
2480 return mImpl->getWeightsLocation(weightsName);
2496 return mImpl->unsetNamedWeights(weightsName);
2512 return mImpl->setWeightsValidation(weightsValidation);
2520 return mImpl->getWeightsValidation();
2542 return mImpl->refitCudaEngineAsync(stream);
2560 return mImpl->getWeightsPrototype(weightsName);
2649 return mImpl->setDimensions(inputName, select, dims);
2661 return mImpl->getDimensions(inputName, select);
2712 char const* inputName,
OptProfileSelector select, int32_t
const* values, int32_t nbValues)
noexcept
2714 return mImpl->setShapeValues(inputName, select, values, nbValues);
2727 return mImpl->getNbShapeValues(inputName);
2741 return mImpl->getShapeValues(inputName, select);
2759 return mImpl->setExtraMemoryTarget(target);
2771 return mImpl->getExtraMemoryTarget();
2788 return mImpl->isValid();
2837 char const* inputName,
OptProfileSelector select, int64_t
const* values, int32_t nbValues)
noexcept
2839 return mImpl->setShapeValuesV2(inputName, select, values, nbValues);
2851 return mImpl->getShapeValuesV2(inputName, select);
2982 return mImpl->setFlags(serializationFlags);
2994 return mImpl->getFlags();
3006 return mImpl->clearFlag(serializationFlag);
3018 return mImpl->setFlag(serializationFlag);
3030 return mImpl->getFlag(serializationFlag);
3085 return mImpl->setExecutionContextAllocationStrategy(strategy);
3095 return mImpl->getExecutionContextAllocationStrategy();
3125 Dims getTensorShape(
char const* tensorName) const noexcept
3127 return mImpl->getTensorShape(tensorName);
3142 return mImpl->getTensorDataType(tensorName);
3156 return mImpl->getNbLayers();
3170 return mImpl->serialize();
3188 return mImpl->createExecutionContext(strategy);
3205 return mImpl->getTensorLocation(tensorName);
3225 return mImpl->isShapeInferenceIO(tensorName);
3239 return mImpl->getTensorIOMode(tensorName);
3251 return mImpl->createExecutionContextWithoutDeviceMemory();
3263 return mImpl->createExecutionContextWithRuntimeConfig(runtimeConfig);
3276 return mImpl->createRuntimeConfig();
3288 return mImpl->getDeviceMemorySize();
3300 return mImpl->getDeviceMemorySizeForProfile(profileIndex);
3316 return mImpl->getDeviceMemorySizeV2();
3332 return mImpl->getDeviceMemorySizeForProfileV2(profileIndex);
3342 return mImpl->isRefittable();
3363 return mImpl->getTensorBytesPerComponent(tensorName);
3381 return mImpl->getTensorBytesPerComponentV2(tensorName, profileIndex);
3402 return mImpl->getTensorComponentsPerElement(tensorName);
3420 return mImpl->getTensorComponentsPerElementV2(tensorName, profileIndex);
3435 return mImpl->getTensorFormat(tensorName);
3449 return mImpl->getTensorFormatV2(tensorName, profileIndex);
3473 return mImpl->getTensorFormatDesc(tensorName);
3496 return mImpl->getTensorFormatDescV2(tensorName, profileIndex);
3513 return mImpl->getTensorVectorizedDim(tensorName);
3529 return mImpl->getTensorVectorizedDimV2(tensorName, profileIndex);
3544 return mImpl->getName();
3555 return mImpl->getNbOptimizationProfiles();
3575 return mImpl->getProfileShape(tensorName, profileIndex, select);
3599 char const* tensorName, int32_t profileIndex,
OptProfileSelector select)
const noexcept
3601 return mImpl->getProfileTensorValues(tensorName, profileIndex, select);
3616 return mImpl->getEngineCapability();
3635 return mImpl->setErrorRecorder(recorder);
3650 return mImpl->getErrorRecorder();
3664 return mImpl->hasImplicitBatchDimension();
3680 return mImpl->getTacticSources();
3692 return mImpl->getProfilingVerbosity();
3702 return mImpl->createEngineInspector();
3715 return mImpl->getNbIOTensors();
3727 return mImpl->getIOTensorName(index);
3738 return mImpl->getHardwareCompatibilityLevel();
3753 return mImpl->getNbAuxStreams();
3763 return mImpl->createSerializationConfig();
3779 return mImpl->serializeWithConfig(config);
3824 return mImpl->setWeightStreamingBudget(gpuMemoryBudget);
3844 return mImpl->getWeightStreamingBudget();
3867 return mImpl->getMinimumWeightStreamingBudget();
3883 return mImpl->getStreamableWeightsSize();
3925 return mImpl->setWeightStreamingBudgetV2(gpuMemoryBudget);
3943 return mImpl->getWeightStreamingBudgetV2();
3968 return mImpl->getWeightStreamingAutomaticBudget();
3996 return mImpl->getWeightStreamingScratchMemorySize();
4010 return mImpl->isDebugTensor(name);
4033 char const* tensorName, int32_t profileIndex,
OptProfileSelector select)
const noexcept
4035 return mImpl->getProfileTensorValuesV2(tensorName, profileIndex, select);
4052 return {
"IOutputAllocator", 1, 0};
4075 char const* tensorName,
void* currentMemory, uint64_t size, uint64_t alignment)
noexcept
4103 char const* tensorName,
void* currentMemory, uint64_t size, uint64_t alignment, cudaStream_t )
4105 return reallocateOutput(tensorName, currentMemory, size, alignment);
4139 return {
"IDebugListener", 1, 0};
4156 char const* name, cudaStream_t stream)
4193 void setDebugSync(
bool sync) noexcept
4195 mImpl->setDebugSync(sync);
4205 return mImpl->getDebugSync();
4215 mImpl->setProfiler(profiler);
4225 return mImpl->getProfiler();
4235 return mImpl->getEngine();
4249 mImpl->setName(name);
4259 return mImpl->getName();
4286 mImpl->setDeviceMemory(memory);
4308 return mImpl->setDeviceMemoryV2(memory, size);
4329 return mImpl->getTensorStrides(tensorName);
4344 return mImpl->getOptimizationProfile();
4362 return mImpl->setInputShape(tensorName, dims);
4399 return mImpl->getTensorShape(tensorName);
4415 return mImpl->allInputDimensionsSpecified();
4432 return mImpl->allInputShapesSpecified();
4451 mImpl->setErrorRecorder(recorder);
4466 return mImpl->getErrorRecorder();
4483 return mImpl->executeV2(bindings);
4527 return mImpl->setOptimizationProfileAsync(profileIndex, stream);
4543 mImpl->setEnqueueEmitsProfile(enqueueEmitsProfile);
4555 return mImpl->getEnqueueEmitsProfile();
4585 return mImpl->reportToProfiler();
4629 return mImpl->setTensorAddress(tensorName, data);
4646 return mImpl->getTensorAddress(tensorName);
4669 return mImpl->setOutputTensorAddress(tensorName, data);
4691 return mImpl->setInputTensorAddress(tensorName, data);
4710 return mImpl->getOutputTensorAddress(tensorName);
4741 int32_t
inferShapes(int32_t nbMaxNames,
char const** tensorNames)
noexcept
4743 return mImpl->inferShapes(nbMaxNames, tensorNames);
4760 return mImpl->updateDeviceMemorySizeForShapes();
4776 return mImpl->setInputConsumedEvent(event);
4786 return mImpl->getInputConsumedEvent();
4805 return mImpl->setOutputAllocator(tensorName, outputAllocator);
4818 return mImpl->getOutputAllocator(tensorName);
4836 return mImpl->getMaxOutputSize(tensorName);
4856 return mImpl->setTemporaryStorageAllocator(allocator);
4866 return mImpl->getTemporaryStorageAllocator();
4890 return mImpl->enqueueV3(stream);
4906 mImpl->setPersistentCacheLimit(size);
4917 return mImpl->getPersistentCacheLimit();
4941 return mImpl->setNvtxVerbosity(verbosity);
4953 return mImpl->getNvtxVerbosity();
4984 mImpl->setAuxStreams(auxStreams, nbStreams);
4996 return mImpl->setDebugListener(listener);
5006 return mImpl->getDebugListener();
5025 return mImpl->setTensorDebugState(name, flag);
5038 return mImpl->setAllTensorsDebugState(flag);
5048 return mImpl->getDebugState(name);
5058 return mImpl->getRuntimeConfig();
5120 return mImpl->setExecutionContext(context);
5132 return mImpl->getExecutionContext();
5157 return mImpl->getLayerInformation(layerIndex, format);
5180 return mImpl->getEngineInformation(format);
5199 mImpl->setErrorRecorder(recorder);
5214 return mImpl->getErrorRecorder();
5227extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(
void* logger, int32_t version)
noexcept;
5233extern "C" TENSORRTAPI void* createInferRefitter_INTERNAL(
void* engine,
void* logger, int32_t version)
noexcept;
5286template <
typename T>
5302#define REGISTER_TENSORRT_PLUGIN(name) \
5303 static nvinfer1::PluginRegistrar<name> pluginRegistrar##name {}
5372 cudaStream_t )
noexcept override = 0;
5424 uint64_t
const size, uint64_t
const alignment,
AllocatorFlags const flags)
noexcept override
5426 return allocateAsync(size, alignment, flags,
nullptr);
5449 return deallocateAsync(memory,
nullptr);
5457 return {
"IGpuAllocator", 1, 0};
5505 virtual
AsciiChar const* getPluginName() const noexcept = 0;
5513 virtual
AsciiChar const* getPluginVersion() const noexcept = 0;
5521 virtual
AsciiChar const* getPluginNamespace() const noexcept = 0;
nvinfer1::ILogger * getLogger() noexcept
Return the logger object.
int32_t getInferLibMajorVersion() noexcept
Return the library major version number.
nvinfer1::IPluginRegistry * getPluginRegistry() noexcept
Return the plugin registry.
int32_t getInferLibPatchVersion() noexcept
Return the library patch version number.
int32_t getInferLibMinorVersion() noexcept
Return the library minor version number.
int32_t getInferLibBuildVersion() noexcept
Return the library build version number.
#define TENSORRTAPI
Definition: NvInferRuntimeBase.h:69
#define NV_TENSORRT_VERSION
Definition: NvInferRuntimeBase.h:101
#define TRT_DEPRECATED
Definition: NvInferRuntimeBase.h:42
#define TRT_DEPRECATED_ENUM
Definition: NvInferRuntimeBase.h:43
Structure to define the dimensions of a tensor.
Definition: NvInferRuntimeBase.h:214
static constexpr int32_t MAX_DIMS
The maximum rank (number of dimensions) supported for a tensor.
Definition: NvInferRuntimeBase.h:217
Analog of class Dims with expressions instead of constants for the dimensions.
Definition: NvInferRuntime.h:350
IDimensionExpr const * d[Dims::MAX_DIMS]
The extent of each dimension.
Definition: NvInferRuntime.h:353
int32_t nbDims
The number of dimensions.
Definition: NvInferRuntime.h:352
An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:3111
int32_t getTensorBytesPerComponent(char const *tensorName) const noexcept
Return the number of bytes per component of an element, or -1 if the tensor is not vectorized or prov...
Definition: NvInferRuntime.h:3361
ISerializationConfig * createSerializationConfig() noexcept
Create a serialization configuration object.
Definition: NvInferRuntime.h:3761
TRT_DEPRECATED int64_t getWeightStreamingBudget() const noexcept
Returns the current weight streaming device memory budget in bytes.
Definition: NvInferRuntime.h:3842
char const * getIOTensorName(int32_t index) const noexcept
Return name of an IO tensor.
Definition: NvInferRuntime.h:3725
int64_t getWeightStreamingBudgetV2() const noexcept
Returns the current weight streaming device memory budget in bytes.
Definition: NvInferRuntime.h:3941
EngineCapability getEngineCapability() const noexcept
Determine what execution capability this engine has.
Definition: NvInferRuntime.h:3614
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:3648
TensorFormat getTensorFormat(char const *tensorName, int32_t profileIndex) const noexcept
Return the tensor format of given profile, or TensorFormat::kLINEAR if the provided name does not map...
Definition: NvInferRuntime.h:3447
int64_t const * getProfileTensorValuesV2(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values (not dimensions) for an input tensor given its name under ...
Definition: NvInferRuntime.h:4032
TRT_DEPRECATED bool hasImplicitBatchDimension() const noexcept
Query whether the engine was built with an implicit batch dimension.
Definition: NvInferRuntime.h:3662
apiv::VCudaEngine * mImpl
Definition: NvInferRuntime.h:4039
TRT_DEPRECATED size_t getDeviceMemorySizeForProfile(int32_t profileIndex) const noexcept
Return the maximum device memory required by the context for a profile.
Definition: NvInferRuntime.h:3298
IExecutionContext * createExecutionContext(ExecutionContextAllocationStrategy strategy=ExecutionContextAllocationStrategy::kSTATIC) noexcept
Create an execution context and specify the strategy for allocating internal activation memory.
Definition: NvInferRuntime.h:3185
char const * getTensorFormatDesc(char const *tensorName) const noexcept
Return the human readable description of the tensor format, or empty string if the provided name does...
Definition: NvInferRuntime.h:3471
Dims getProfileShape(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for an input tensor given its name under an optimizati...
Definition: NvInferRuntime.h:3573
bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept
Limit the maximum amount of GPU memory usable for network weights in bytes.
Definition: NvInferRuntime.h:3923
IExecutionContext * createExecutionContext(IRuntimeConfig *runtimeConfig) noexcept
Create an execution context with TensorRT JIT runtime config.
Definition: NvInferRuntime.h:3261
int32_t getNbAuxStreams() const noexcept
Return the number of auxiliary streams used by this engine.
Definition: NvInferRuntime.h:3751
int64_t getStreamableWeightsSize() const noexcept
Get the total size in bytes of all streamable weights.
Definition: NvInferRuntime.h:3881
DataType getTensorDataType(char const *tensorName) const noexcept
Determine the required data type for a buffer from its tensor name.
Definition: NvInferRuntime.h:3140
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:3633
TacticSources getTacticSources() const noexcept
return the tactic sources required by this engine.
Definition: NvInferRuntime.h:3678
IHostMemory * serializeWithConfig(ISerializationConfig &config) const noexcept
Serialize the network to a stream with the provided SerializationConfig.
Definition: NvInferRuntime.h:3777
virtual ~ICudaEngine() noexcept=default
int64_t getWeightStreamingAutomaticBudget() const noexcept
TensorRT automatically determines a device memory budget for the model to run. The budget is close to...
Definition: NvInferRuntime.h:3966
bool isDebugTensor(char const *name) const noexcept
Check if a tensor is marked as a debug tensor.
Definition: NvInferRuntime.h:4008
int32_t getTensorVectorizedDim(char const *tensorName, int32_t profileIndex) const noexcept
Return the dimension index that the buffer is vectorized of given profile, or -1 if the provided name...
Definition: NvInferRuntime.h:3527
char const * getName() const noexcept
Returns the name of the network associated with the engine.
Definition: NvInferRuntime.h:3542
ProfilingVerbosity getProfilingVerbosity() const noexcept
Return the ProfilingVerbosity the builder config was set to when the engine was built.
Definition: NvInferRuntime.h:3690
bool isShapeInferenceIO(char const *tensorName) const noexcept
True if tensor is required as input for shape calculations or is output from shape calculations.
Definition: NvInferRuntime.h:3223
TRT_DEPRECATED int32_t const * getProfileTensorValues(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values (not dimensions) for an input tensor given its name under ...
Definition: NvInferRuntime.h:3598
int64_t getWeightStreamingScratchMemorySize() const noexcept
Returns the size of the scratch memory required by the current weight streaming budget.
Definition: NvInferRuntime.h:3994
TRT_DEPRECATED bool setWeightStreamingBudget(int64_t gpuMemoryBudget) noexcept
Limit the maximum amount of GPU memory usable for network weights in bytes.
Definition: NvInferRuntime.h:3822
int64_t getDeviceMemorySizeV2() const noexcept
Return the maximum device memory required by the context over all profiles.
Definition: NvInferRuntime.h:3314
int32_t getTensorVectorizedDim(char const *tensorName) const noexcept
Return the dimension index that the buffer is vectorized, or -1 if the provided name does not map to ...
Definition: NvInferRuntime.h:3511
TRT_DEPRECATED size_t getDeviceMemorySize() const noexcept
Return the maximum device memory required by the context over all profiles.
Definition: NvInferRuntime.h:3286
int32_t getTensorComponentsPerElement(char const *tensorName, int32_t profileIndex) const noexcept
Return the number of components included in one element of given profile, or -1 if tensor is not vect...
Definition: NvInferRuntime.h:3418
int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept
Return the maximum device memory required by the context for a profile.
Definition: NvInferRuntime.h:3330
IRuntimeConfig * createRuntimeConfig() noexcept
Create a runtime config for TensorRT JIT. The caller is responsible for ownership of the returned IRu...
Definition: NvInferRuntime.h:3274
TensorFormat getTensorFormat(char const *tensorName) const noexcept
Return the tensor format, or TensorFormat::kLINEAR if the provided name does not map to an input or o...
Definition: NvInferRuntime.h:3433
IHostMemory * serialize() const noexcept
Serialize the network to a stream.
Definition: NvInferRuntime.h:3168
TensorLocation getTensorLocation(char const *tensorName) const noexcept
Get whether an input or output tensor must be on GPU or CPU.
Definition: NvInferRuntime.h:3203
IEngineInspector * createEngineInspector() const noexcept
Create a new engine inspector which prints the layer information in an engine or an execution context...
Definition: NvInferRuntime.h:3700
int32_t getTensorBytesPerComponent(char const *tensorName, int32_t profileIndex) const noexcept
Return the number of bytes per component of an element given of given profile, or -1 if the tensor is...
Definition: NvInferRuntime.h:3379
HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept
Return the hardware compatibility level of this engine.
Definition: NvInferRuntime.h:3736
int32_t getNbOptimizationProfiles() const noexcept
Get the number of optimization profiles defined for this engine.
Definition: NvInferRuntime.h:3553
TRT_DEPRECATED IExecutionContext * createExecutionContextWithoutDeviceMemory() noexcept
create an execution context without any device memory allocated
Definition: NvInferRuntime.h:3249
char const * getTensorFormatDesc(char const *tensorName, int32_t profileIndex) const noexcept
Return the human readable description of the tensor format of given profile, or empty string if the p...
Definition: NvInferRuntime.h:3494
TRT_DEPRECATED int64_t getMinimumWeightStreamingBudget() const noexcept
The minimum number of bytes of GPU memory required by network weights for successful weight streaming...
Definition: NvInferRuntime.h:3865
TensorIOMode getTensorIOMode(char const *tensorName) const noexcept
Determine whether a tensor is an input or output tensor.
Definition: NvInferRuntime.h:3237
int32_t getNbLayers() const noexcept
Get the number of layers in the network.
Definition: NvInferRuntime.h:3154
int32_t getNbIOTensors() const noexcept
Return number of IO tensors.
Definition: NvInferRuntime.h:3713
int32_t getTensorComponentsPerElement(char const *tensorName) const noexcept
Return the number of components included in one element, or -1 if tensor is not vectorized or if the ...
Definition: NvInferRuntime.h:3400
bool isRefittable() const noexcept
Return true if an engine can be refit.
Definition: NvInferRuntime.h:3340
An IDimensionExpr represents an integer expression constructed from constants, input dimensions,...
Definition: NvInferRuntime.h:232
bool isConstant() const noexcept
Return true if expression is a build-time constant.
Definition: NvInferRuntime.h:237
bool isSizeTensor() const noexcept
Return true if this denotes the value of a size tensor.
Definition: NvInferRuntime.h:263
virtual ~IDimensionExpr() noexcept=default
apiv::VDimensionExpr * mImpl
Definition: NvInferRuntime.h:254
int64_t getConstantValue() const noexcept
Get the value of the constant.
Definition: NvInferRuntime.h:248
An engine inspector which prints out the layer information of an engine or an execution context.
Definition: NvInferRuntime.h:5102
char const * getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
Get a string describing the information about a specific layer in the current engine or the execution...
Definition: NvInferRuntime.h:5155
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:5212
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:5197
IExecutionContext const * getExecutionContext() const noexcept
Get the context currently being inspected.
Definition: NvInferRuntime.h:5130
apiv::VEngineInspector * mImpl
Definition: NvInferRuntime.h:5218
virtual ~IEngineInspector() noexcept=default
char const * getEngineInformation(LayerInformationFormat format) const noexcept
Get a string describing the information about all the layers in the current engine or the execution c...
Definition: NvInferRuntime.h:5178
Context for executing inference using an engine, with functionally unsafe features.
Definition: NvInferRuntime.h:4181
IOutputAllocator * getOutputAllocator(char const *tensorName) const noexcept
Get output allocator associated with output tensor of given name, or nullptr if the provided name doe...
Definition: NvInferRuntime.h:4816
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:4464
bool reportToProfiler() const noexcept
Calculate layer timing info for the current optimization profile in IExecutionContext and update the ...
Definition: NvInferRuntime.h:4583
virtual ~IExecutionContext() noexcept=default
void setDeviceMemory(void *memory) noexcept
Set the device memory for use by this execution context.
Definition: NvInferRuntime.h:4284
TRT_DEPRECATED bool allInputShapesSpecified() const noexcept
Whether all input shape bindings have been specified.
Definition: NvInferRuntime.h:4430
bool setTensorDebugState(char const *name, bool flag) noexcept
Set debug state of tensor given the tensor name.
Definition: NvInferRuntime.h:5023
char const * getName() const noexcept
Return the name of the execution context.
Definition: NvInferRuntime.h:4257
IGpuAllocator * getTemporaryStorageAllocator() const noexcept
Get allocator set by setTemporaryStorageAllocator.
Definition: NvInferRuntime.h:4864
void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
Set whether enqueue emits layer timing to the profiler.
Definition: NvInferRuntime.h:4541
Dims getTensorShape(char const *tensorName) const noexcept
Return the shape of the given input or output.
Definition: NvInferRuntime.h:4397
bool getDebugState(char const *name) const noexcept
Definition: NvInferRuntime.h:5046
bool setInputShape(char const *tensorName, Dims const &dims) noexcept
Set shape of given input.
Definition: NvInferRuntime.h:4360
bool executeV2(void *const *bindings) noexcept
Synchronously execute a network.
Definition: NvInferRuntime.h:4481
bool getEnqueueEmitsProfile() const noexcept
Get the enqueueEmitsProfile state.
Definition: NvInferRuntime.h:4553
void const * getTensorAddress(char const *tensorName) const noexcept
Get memory address bound to given input or output tensor, or nullptr if the provided name does not ma...
Definition: NvInferRuntime.h:4644
bool setOutputAllocator(char const *tensorName, IOutputAllocator *outputAllocator) noexcept
Set output allocator to use for output tensor of given name. Pass nullptr to outputAllocator to unset...
Definition: NvInferRuntime.h:4803
bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
Select an optimization profile for the current context with async semantics.
Definition: NvInferRuntime.h:4525
apiv::VExecutionContext * mImpl
Definition: NvInferRuntime.h:5062
bool setOutputTensorAddress(char const *tensorName, void *data) noexcept
Set the memory address for a given output tensor.
Definition: NvInferRuntime.h:4667
void setPersistentCacheLimit(size_t size) noexcept
Set the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:4904
size_t getPersistentCacheLimit() const noexcept
Get the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:4915
bool setAllTensorsDebugState(bool flag) noexcept
Definition: NvInferRuntime.h:5036
ICudaEngine const & getEngine() const noexcept
Get the associated engine.
Definition: NvInferRuntime.h:4233
ProfilingVerbosity getNvtxVerbosity() const noexcept
Get the NVTX verbosity of the execution context.
Definition: NvInferRuntime.h:4951
size_t updateDeviceMemorySizeForShapes() noexcept
Recompute the internal activation buffer sizes based on the current input shapes, and return the tota...
Definition: NvInferRuntime.h:4758
void setAuxStreams(cudaStream_t *auxStreams, int32_t nbStreams) noexcept
Set the auxiliary streams that TensorRT should launch kernels on in the next enqueueV3() call.
Definition: NvInferRuntime.h:4982
int64_t getMaxOutputSize(char const *tensorName) const noexcept
Get upper bound on an output tensor's size, in bytes, based on the current optimization profile and i...
Definition: NvInferRuntime.h:4834
int32_t inferShapes(int32_t nbMaxNames, char const **tensorNames) noexcept
Run shape calculations.
Definition: NvInferRuntime.h:4741
bool setDebugListener(IDebugListener *listener) noexcept
Set DebugListener for this execution context.
Definition: NvInferRuntime.h:4994
bool setTensorAddress(char const *tensorName, void *data) noexcept
Set memory address for given input or output tensor.
Definition: NvInferRuntime.h:4627
bool setTemporaryStorageAllocator(IGpuAllocator *allocator) noexcept
Specify allocator to use for internal temporary storage.
Definition: NvInferRuntime.h:4854
void * getOutputTensorAddress(char const *tensorName) const noexcept
Get memory address for given output.
Definition: NvInferRuntime.h:4708
bool enqueueV3(cudaStream_t stream) noexcept
Enqueue inference on a stream.
Definition: NvInferRuntime.h:4888
IDebugListener * getDebugListener() noexcept
Get the DebugListener of this execution context.
Definition: NvInferRuntime.h:5004
int32_t getOptimizationProfile() const noexcept
Get the index of the currently selected optimization profile.
Definition: NvInferRuntime.h:4342
bool setInputTensorAddress(char const *tensorName, void const *data) noexcept
Set memory address for given input.
Definition: NvInferRuntime.h:4689
bool getDebugSync() const noexcept
Get the debug sync flag.
Definition: NvInferRuntime.h:4203
bool setInputConsumedEvent(cudaEvent_t event) noexcept
Mark input as consumed.
Definition: NvInferRuntime.h:4774
Dims getTensorStrides(char const *tensorName) const noexcept
Return the strides of the buffer for the given tensor name.
Definition: NvInferRuntime.h:4327
bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
Set the verbosity of the NVTX markers in the execution context.
Definition: NvInferRuntime.h:4939
IProfiler * getProfiler() const noexcept
Get the profiler.
Definition: NvInferRuntime.h:4223
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:4449
void setDeviceMemoryV2(void *memory, int64_t size) noexcept
Set the device memory and its corresponding size for use by this execution context.
Definition: NvInferRuntime.h:4306
bool allInputDimensionsSpecified() const noexcept
Whether all dynamic dimensions of input tensors have been specified.
Definition: NvInferRuntime.h:4413
void setProfiler(IProfiler *profiler) noexcept
Set the profiler.
Definition: NvInferRuntime.h:4213
void setName(char const *name) noexcept
Set the name of the execution context.
Definition: NvInferRuntime.h:4247
cudaEvent_t getInputConsumedEvent() const noexcept
The event associated with consuming the input.
Definition: NvInferRuntime.h:4784
IRuntimeConfig * getRuntimeConfig() const noexcept
Get the runtime config object used during execution context creation.
Definition: NvInferRuntime.h:5056
Object for constructing IDimensionExpr.
Definition: NvInferRuntime.h:287
IDimensionExpr const * operation(DimensionOperation op, IDimensionExpr const &first, IDimensionExpr const &second) noexcept
Get the operation.
Definition: NvInferRuntime.h:303
virtual ~IExprBuilder() noexcept=default
IDimensionExpr const * constant(int64_t value) noexcept
Return pointer to IDimensionExpr for given value.
Definition: NvInferRuntime.h:292
apiv::VExprBuilder * mImpl
Definition: NvInferRuntime.h:310
IDimensionExpr const * declareSizeTensor(int32_t outputIndex, IDimensionExpr const &opt, IDimensionExpr const &upper)
Declare a size tensor at the given output index, with the specified auto-tuning formula and upper bou...
Definition: NvInferRuntime.h:338
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:142
void * data() const noexcept
A pointer to the raw data that is owned by the library.
Definition: NvInferRuntime.h:147
DataType type() const noexcept
The type of the memory that was allocated.
Definition: NvInferRuntime.h:159
std::size_t size() const noexcept
The size in bytes of the data that was allocated.
Definition: NvInferRuntime.h:153
apiv::VHostMemory * mImpl
Definition: NvInferRuntime.h:165
virtual ~IHostMemory() noexcept=default
A virtual base class to find a logger. Allows a plugin to find an instance of a logger if it needs to...
Definition: NvInferRuntime.h:5316
virtual ILogger * findLogger()=0
Get the logger used by the engine or execution context which called the plugin method.
virtual ~ILoggerFinder()=default
Application-implemented logging interface for the builder, refitter and runtime.
Definition: NvInferRuntime.h:1542
virtual ~ILogger()=default
Severity
The severity corresponding to a log message.
Definition: NvInferRuntime.h:1550
virtual void log(Severity severity, AsciiChar const *msg) noexcept=0
A callback implemented by the application to handle logging messages;.
Forward declaration of IEngineInspector for use by other interfaces.
Definition: NvInferRuntime.h:51
INoCopy & operator=(INoCopy &&other)=delete
INoCopy(INoCopy const &other)=delete
INoCopy(INoCopy &&other)=delete
virtual ~INoCopy()=default
INoCopy & operator=(INoCopy const &other)=delete
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:2618
TRT_DEPRECATED int32_t const * getShapeValues(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2739
apiv::VOptimizationProfile * mImpl
Definition: NvInferRuntime.h:2855
Dims getDimensions(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:2659
TRT_DEPRECATED bool setShapeValues(char const *inputName, OptProfileSelector select, int32_t const *values, int32_t nbValues) noexcept
Set the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2711
virtual ~IOptimizationProfile() noexcept=default
float getExtraMemoryTarget() const noexcept
Get the extra memory target that has been defined for this profile.
Definition: NvInferRuntime.h:2769
bool setExtraMemoryTarget(float target) noexcept
Set a target for extra GPU memory that may be used by this profile.
Definition: NvInferRuntime.h:2757
bool setDimensions(char const *inputName, OptProfileSelector select, Dims const &dims) noexcept
Set the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:2647
bool isValid() const noexcept
Check whether the optimization profile can be passed to an IBuilderConfig object.
Definition: NvInferRuntime.h:2786
int64_t const * getShapeValuesV2(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2849
bool setShapeValuesV2(char const *inputName, OptProfileSelector select, int64_t const *values, int32_t nbValues) noexcept
Set the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2836
int32_t getNbShapeValues(char const *inputName) const noexcept
Get the number of values for an input shape tensor.
Definition: NvInferRuntime.h:2725
Single registration point for all plugins in an application. It is used to find plugin implementation...
Definition: NvInferRuntimeCommon.h:56
virtual TRT_DEPRECATED bool registerCreator(IPluginCreator &creator, AsciiChar const *const pluginNamespace) noexcept=0
Register a plugin creator implementing IPluginCreator. Returns false if any plugin creator with the s...
Interface for plugins to access per context resources provided by TensorRT.
Definition: NvInferRuntime.h:767
virtual IErrorRecorder * getErrorRecorder() const noexcept=0
Get the error recorder associated with the resource context.
IPluginResourceContext & operator=(IPluginResourceContext const &) &=default
virtual IGpuAllocator * getGpuAllocator() const noexcept=0
Get the GPU allocator associated with the resource context.
Similar to IPluginV2Ext, but with support for dynamic shapes.
Definition: NvInferRuntime.h:407
IPluginV2DynamicExt * clone() const noexcept override=0
Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin...
virtual ~IPluginV2DynamicExt() noexcept
Definition: NvInferRuntime.h:558
Plugin class for user-implemented layers.
Definition: NvInferRuntimePlugin.h:474
Updates weights in an engine.
Definition: NvInferRuntime.h:2138
bool refitCudaEngineAsync(cudaStream_t stream) noexcept
Enqueue weights refitting of the associated engine on the given stream.
Definition: NvInferRuntime.h:2540
int32_t getMaxThreads() const noexcept
get the maximum number of threads that can be used by the refitter.
Definition: NvInferRuntime.h:2419
TensorLocation getWeightsLocation(char const *weightsName) const noexcept
Get location for the weights associated with the given name.
Definition: NvInferRuntime.h:2478
bool setNamedWeights(char const *name, Weights weights) noexcept
Specify new weights of given name.
Definition: NvInferRuntime.h:2343
int32_t getAllWeights(int32_t size, char const **weightsNames) noexcept
Get names of all weights that could be refit.
Definition: NvInferRuntime.h:2379
ILogger * getLogger() const noexcept
get the logger with which the refitter was created
Definition: NvInferRuntime.h:2389
bool refitCudaEngine() noexcept
Refits associated engine.
Definition: NvInferRuntime.h:2174
int32_t getMissingWeights(int32_t size, char const **weightsNames) noexcept
Get names of missing weights.
Definition: NvInferRuntime.h:2363
TRT_DEPRECATED float getDynamicRangeMax(char const *tensorName) const noexcept
Get maximum of dynamic range.
Definition: NvInferRuntime.h:2266
int32_t getMissing(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of missing weights.
Definition: NvInferRuntime.h:2195
Weights getNamedWeights(char const *weightsName) const noexcept
Get weights associated with the given name.
Definition: NvInferRuntime.h:2462
bool unsetNamedWeights(char const *weightsName) noexcept
Unset weights associated with the given name.
Definition: NvInferRuntime.h:2494
Weights getWeightsPrototype(char const *weightsName) const noexcept
Get the Weights prototype associated with the given name.
Definition: NvInferRuntime.h:2558
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:2405
TRT_DEPRECATED float getDynamicRangeMin(char const *tensorName) const noexcept
Get minimum of dynamic range.
Definition: NvInferRuntime.h:2250
TRT_DEPRECATED int32_t getTensorsWithDynamicRange(int32_t size, char const **tensorNames) const noexcept
Get names of all tensors that have refittable dynamic ranges.
Definition: NvInferRuntime.h:2284
bool setNamedWeights(char const *name, Weights weights, TensorLocation location) noexcept
Specify new weights on a specified device of given name.
Definition: NvInferRuntime.h:2446
void setWeightsValidation(bool weightsValidation) noexcept
Set whether to validate weights during refitting.
Definition: NvInferRuntime.h:2510
TRT_DEPRECATED bool setDynamicRange(char const *tensorName, float min, float max) noexcept
Definition: NvInferRuntime.h:2234
apiv::VRefitter * mImpl
Definition: NvInferRuntime.h:2564
int32_t getAll(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of all weights that could be refit.
Definition: NvInferRuntime.h:2212
virtual ~IRefitter() noexcept=default
bool getWeightsValidation() const noexcept
Get whether to validate weights values during refitting.
Definition: NvInferRuntime.h:2518
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:2303
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:2318
A class for runtime configuration. This class is used during execution context creation.
Definition: NvInferRuntime.h:3074
virtual ~IRuntimeConfig() noexcept=default
apiv::VRuntimeConfig * mImpl
Definition: NvInferRuntime.h:3100
ExecutionContextAllocationStrategy getExecutionContextAllocationStrategy() const noexcept
Get the execution context allocation strategy.
Definition: NvInferRuntime.h:3093
Allows a serialized functionally unsafe engine to be deserialized.
Definition: NvInferRuntime.h:1819
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:1983
IRuntime * loadRuntime(char const *path) noexcept
Load IRuntime from the file.
Definition: NvInferRuntime.h:2099
bool getEngineHostCodeAllowed() const noexcept
Get whether the runtime is allowed to deserialize engines with host executable code.
Definition: NvInferRuntime.h:2121
TempfileControlFlags getTempfileControlFlags() const noexcept
Get the tempfile control flags for this runtime.
Definition: NvInferRuntime.h:2071
void setEngineHostCodeAllowed(bool allowed) noexcept
Set whether the runtime is allowed to deserialize engines with host executable code.
Definition: NvInferRuntime.h:2111
virtual ~IRuntime() noexcept=default
void setTemporaryDirectory(char const *path) noexcept
Set the directory that will be used by this runtime for temporary files.
Definition: NvInferRuntime.h:2032
IPluginRegistry & getPluginRegistry() noexcept
Get the local plugin registry that can be used by the runtime.
Definition: NvInferRuntime.h:2081
apiv::VRuntime * mImpl
Definition: NvInferRuntime.h:2127
int32_t getNbDLACores() const noexcept
Returns number of DLA hardware cores accessible or 0 if DLA is unavailable.
Definition: NvInferRuntime.h:1852
ICudaEngine * deserializeCudaEngine(void const *blob, std::size_t size) noexcept
Deserialize an engine from host memory.
Definition: NvInferRuntime.h:1917
void setTempfileControlFlags(TempfileControlFlags flags) noexcept
Set the tempfile control flags for this runtime.
Definition: NvInferRuntime.h:2059
int32_t getDLACore() const noexcept
Get the DLA core that the engine executes on.
Definition: NvInferRuntime.h:1844
void setGpuAllocator(IGpuAllocator *allocator) noexcept
Set the GPU allocator.
Definition: NvInferRuntime.h:1868
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:1902
ICudaEngine * deserializeCudaEngine(IStreamReaderV2 &streamReader)
Deserialize an engine from a stream. IStreamReaderV2 is expected to support reading to both host and ...
Definition: NvInferRuntime.h:1958
ILogger * getLogger() const noexcept
get the logger with which the runtime was created
Definition: NvInferRuntime.h:1968
int32_t getMaxThreads() const noexcept
Get the maximum number of threads that can be used by the runtime.
Definition: NvInferRuntime.h:1997
char const * getTemporaryDirectory() const noexcept
Get the directory that will be used by this runtime for temporary files.
Definition: NvInferRuntime.h:2043
TRT_DEPRECATED ICudaEngine * deserializeCudaEngine(IStreamReader &streamReader)
Deserialize an engine from a stream.
Definition: NvInferRuntime.h:1938
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:1887
Holds properties for configuring an engine to serialize the binary.
Definition: NvInferRuntime.h:2965
virtual ~ISerializationConfig() noexcept=default
bool clearFlag(SerializationFlag serializationFlag) noexcept
clear a serialization flag.
Definition: NvInferRuntime.h:3004
bool setFlag(SerializationFlag serializationFlag) noexcept
Set a serialization flag.
Definition: NvInferRuntime.h:3016
SerializationFlags getFlags() const noexcept
Get the serialization flags for this config.
Definition: NvInferRuntime.h:2992
bool getFlag(SerializationFlag serializationFlag) const noexcept
Returns true if the serialization flag is set.
Definition: NvInferRuntime.h:3028
apiv::VSerializationConfig * mImpl
Definition: NvInferRuntime.h:3034
An Interface class for version control.
Definition: NvInferRuntimeBase.h:274
Version information associated with a TRT interface.
Definition: NvInferRuntimeBase.h:239
Register the plugin creator to the registry The static registry object will be instantiated when the ...
Definition: NvInferRuntime.h:5288
PluginRegistrar()
Definition: NvInferRuntime.h:5290
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:124
DataType type
The type of the weights.
Definition: NvInferRuntime.h:126
int64_t count
The number of weights in the array.
Definition: NvInferRuntime.h:128
void const * values
The weight values, in a contiguous array.
Definition: NvInferRuntime.h:127
Definition: NvInferRuntime.h:4132
virtual bool processDebugTensor(void const *addr, TensorLocation location, DataType type, Dims const &shape, char const *name, cudaStream_t stream)=0
Callback function that is called when a debug tensor’s value is updated and the debug state of the te...
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:4137
~IDebugListener() override=default
Definition: NvInferRuntimeBase.h:411
Definition: NvInferRuntime.h:1610
virtual void * allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t) noexcept
A thread-safe callback implemented by the application to handle stream-ordered acquisition of GPU mem...
Definition: NvInferRuntime.h:1732
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1773
virtual TRT_DEPRECATED bool deallocate(void *const memory) noexcept=0
A thread-safe callback implemented by the application to handle release of GPU memory.
~IGpuAllocator() override=default
virtual void * reallocate(void *const, uint64_t, uint64_t) noexcept
A thread-safe callback implemented by the application to resize an existing allocation.
Definition: NvInferRuntime.h:1679
virtual TRT_DEPRECATED void * allocate(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept=0
A thread-safe callback implemented by the application to handle acquisition of GPU memory.
virtual bool deallocateAsync(void *const memory, cudaStream_t) noexcept
A thread-safe callback implemented by the application to handle stream-ordered release of GPU memory.
Definition: NvInferRuntime.h:1765
Definition: NvInferRuntime.h:5337
bool deallocateAsync(void *const memory, cudaStream_t) noexcept override=0
A thread-safe callback implemented by the application to handle stream-ordered asynchronous release o...
IGpuAsyncAllocator()=default
void * allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t) noexcept override=0
A thread-safe callback implemented by the application to handle stream-ordered asynchronous acquisiti...
TRT_DEPRECATED void * allocate(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept override
A thread-safe callback implemented by the application to handle acquisition of GPU memory.
Definition: NvInferRuntime.h:5423
TRT_DEPRECATED bool deallocate(void *const memory) noexcept override
A thread-safe callback implemented by the application to handle release of GPU memory.
Definition: NvInferRuntime.h:5447
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:5455
~IGpuAsyncAllocator() override=default
Definition: NvInferRuntime.h:4045
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:4050
virtual void * reallocateOutputAsync(char const *tensorName, void *currentMemory, uint64_t size, uint64_t alignment, cudaStream_t)
Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated....
Definition: NvInferRuntime.h:4102
virtual void notifyShape(char const *tensorName, Dims const &dims) noexcept=0
Called by TensorRT when the shape of the output tensor is known.
virtual TRT_DEPRECATED void * reallocateOutput(char const *tensorName, void *currentMemory, uint64_t size, uint64_t alignment) noexcept
Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated....
Definition: NvInferRuntime.h:4074
Definition: NvInferPluginBase.h:141
Definition: NvInferPluginBase.h:193
Definition: NvInferRuntime.h:5462
virtual PluginFieldCollection const * getFieldNames() noexcept=0
Return a list of fields that need to be passed to createPlugin() when creating a plugin for use in th...
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:5467
virtual IPluginV3 * createPlugin(AsciiChar const *name, PluginFieldCollection const *fc, TensorRTPhase phase) noexcept=0
Return a plugin object. Return nullptr in case of error.
Definition: NvInferPluginBase.h:206
Definition: NvInferRuntime.h:836
virtual int32_t getFormatCombinationLimit() noexcept
Return the maximum number of format combinations that will be timed by TensorRT during the build phas...
Definition: NvInferRuntime.h:1040
virtual int32_t getNbOutputs() const noexcept=0
Get the number of outputs from the plugin.
virtual int32_t configurePlugin(DynamicPluginTensorDesc const *in, int32_t nbInputs, DynamicPluginTensorDesc const *out, int32_t nbOutputs) noexcept=0
Configure the plugin.
virtual int32_t getNbTactics() noexcept
Query for the number of custom tactics the plugin intends to use.
Definition: NvInferRuntime.h:1016
virtual char const * getMetadataString() noexcept
Query for a string representing the configuration of the plugin. May be called anytime after plugin c...
Definition: NvInferRuntime.h:1051
virtual char const * getTimingCacheID() noexcept
Called to query the suffix to use for the timing cache ID. May be called anytime after plugin creatio...
Definition: NvInferRuntime.h:1032
virtual bool supportsFormatCombination(int32_t pos, DynamicPluginTensorDesc const *inOut, int32_t nbInputs, int32_t nbOutputs) noexcept=0
Return true if plugin supports the format and datatype for the input/output indexed by pos.
virtual int32_t getOutputDataTypes(DataType *outputTypes, int32_t nbOutputs, const DataType *inputTypes, int32_t nbInputs) const noexcept=0
Provide the data types of the plugin outputs if the input tensors have the data types provided.
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:848
virtual int32_t getOutputShapes(DimsExprs const *inputs, int32_t nbInputs, DimsExprs const *shapeInputs, int32_t nbShapeInputs, DimsExprs *outputs, int32_t nbOutputs, IExprBuilder &exprBuilder) noexcept=0
Provide expressions for computing dimensions of the output tensors from dimensions of the input tenso...
virtual int32_t getValidTactics(int32_t *tactics, int32_t nbTactics) noexcept
Query for any custom tactics that the plugin intends to use.
Definition: NvInferRuntime.h:1008
Definition: NvInferRuntime.h:793
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:798
virtual AsciiChar const * getPluginName() const noexcept=0
Return the plugin name. Should match the plugin name returned by the corresponding plugin creator.
Definition: NvInferRuntime.h:1058
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1063
virtual int32_t onShapeChange(PluginTensorDesc const *in, int32_t nbInputs, PluginTensorDesc const *out, int32_t nbOutputs) noexcept=0
Called when a plugin is being prepared for execution for specific dimensions. This could happen multi...
virtual PluginFieldCollection const * getFieldsToSerialize() noexcept=0
Get the plugin fields which should be serialized.
virtual int32_t setTactic(int32_t tactic) noexcept
Set the tactic to be used in the subsequent call to enqueue(). If no custom tactics were advertised,...
Definition: NvInferRuntime.h:1075
virtual int32_t enqueue(PluginTensorDesc const *inputDesc, PluginTensorDesc const *outputDesc, void const *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept=0
Execute the layer.
virtual IPluginV3 * attachToContext(IPluginResourceContext *context) noexcept=0
Clone the plugin, attach the cloned plugin object to a execution context and grant the cloned plugin ...
Definition: NvInferRuntime.h:1246
virtual ~IProfiler() noexcept
Definition: NvInferRuntime.h:1257
virtual void reportLayerTime(char const *layerName, float ms) noexcept=0
Layer time reporting callback.
Definition: NvInferRuntime.h:631
~IStreamReader() override=default
IStreamReader & operator=(IStreamReader const &) &=default
IStreamReader & operator=(IStreamReader &&) &=default
virtual int64_t read(void *destination, int64_t nbBytes)=0
Read the next number of bytes in the stream.
IStreamReader(IStreamReader &&)=default
IStreamReader(IStreamReader const &)=default
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:643
Definition: NvInferRuntime.h:695
IStreamReaderV2 & operator=(IStreamReaderV2 const &) &=default
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:707
IStreamReaderV2(IStreamReaderV2 &&)=default
~IStreamReaderV2() override=default
virtual int64_t read(void *destination, int64_t nbBytes, cudaStream_t stream) noexcept=0
Read the next number of bytes in the stream asynchronously.
IStreamReaderV2()=default
IStreamReaderV2(IStreamReaderV2 const &)=default
virtual bool seek(int64_t offset, SeekPosition where) noexcept=0
Sets the position of the stream to the given offset.
IStreamReaderV2 & operator=(IStreamReaderV2 &&) &=default
Definition: NvInferRuntime.h:1155
virtual int32_t getAliasedInput(int32_t outputIndex) noexcept
Communicates to TensorRT that the output at the specified output index is aliased to the input at the...
Definition: NvInferRuntime.h:1191
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1157
IRefitter * createInferRefitter(ICudaEngine &engine, ILogger &logger) noexcept
Create an instance of an IRefitter class.
Definition: NvInferRuntime.h:5268
IRuntime * createInferRuntime(ILogger &logger) noexcept
Create an instance of an IRuntime class.
Definition: NvInferRuntime.h:5257
The TensorRT API version 1 namespace.
uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferRuntime.h:2904
v_1_0::IOutputAllocator IOutputAllocator
Definition: NvInferRuntime.h:4127
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:76
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:179
@ kSUB
Substract the second element from the first.
@ kSUM
Sum of the two operands.
@ kPROD
Product of the two operands.
@ kFLOOR_DIV
Floor division of the first element by the second.
@ kEQUAL
1 if operands are equal, 0 otherwise.
@ kMIN
Minimum of the two operands.
@ kLESS
1 if first operand is less than second operand, 0 otherwise.
@ kMAX
Maximum of the two operands.
@ kCEIL_DIV
Division rounding up.
v_1_0::IPluginV3OneCore IPluginV3OneCore
Definition: NvInferRuntime.h:1208
TensorIOMode
Definition of tensor IO Mode.
Definition: NvInferRuntimeBase.h:654
HardwareCompatibilityLevel
Describes requirements of compatibility with GPU architectures other than that of the GPU on which th...
Definition: NvInfer.h:9254
SerializationFlag
List of valid flags that the engine can enable when serializing the bytes.
Definition: NvInferRuntime.h:2945
@ kEXCLUDE_WEIGHTS
Exclude the weights that can be refitted.
v_1_0::IProfiler IProfiler
Definition: NvInferRuntime.h:1272
SeekPosition
Controls the seek mode of IStreamReaderV2.
Definition: NvInferRuntime.h:681
@ kSET
From the beginning of the file.
@ kCUR
From the current position of the file.
@ kEND
From the tail of the file.
v_1_0::IStreamReaderV2 IStreamReaderV2
Definition: NvInferRuntime.h:751
uint32_t TempfileControlFlags
Represents a collection of one or more TempfileControlFlag values combined using bitwise-OR operation...
Definition: NvInferRuntime.h:1350
v_1_0::IGpuAllocator IGpuAllocator
Definition: NvInferRuntime.h:1809
constexpr int32_t EnumMax< WeightsRole >() noexcept
Maximum number of elements in WeightsRole enum.
Definition: NvInferRuntime.h:1293
char_t AsciiChar
Definition: NvInferRuntimeBase.h:115
TensorRTPhase
Indicates a phase of operation of TensorRT.
Definition: NvInferPluginBase.h:116
@ kV2_DYNAMICEXT
IPluginV2DynamicExt.
constexpr int32_t EnumMax< LayerInformationFormat >() noexcept
Definition: NvInferRuntime.h:5081
DataType
The type of weights and tensors.
Definition: NvInferRuntimeBase.h:143
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:1304
@ kCONSTANT
Constant layer.
v_1_0::IDebugListener IDebugListener
Definition: NvInferRuntime.h:4168
TempfileControlFlag
Flags used to control TensorRT's behavior when creating executable temporary files.
Definition: NvInferRuntime.h:1327
@ kALLOW_IN_MEMORY_FILES
Allow creating and loading files in-memory (or unnamed files).
constexpr int32_t EnumMax< OptProfileSelector >() noexcept
Number of different values of OptProfileSelector enum.
Definition: NvInferRuntime.h:2590
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:1282
@ kSHIFT
shift part of IScaleLayer
@ kANY
Any other weights role.
@ kBIAS
bias for IConvolutionLayer or IDeconvolutionLayer
@ kKERNEL
kernel for IConvolutionLayer or IDeconvolutionLayer
constexpr int32_t EnumMax< ProfilingVerbosity >() noexcept
Maximum number of profile verbosity levels in ProfilingVerbosity enum.
Definition: NvInferRuntime.h:2924
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
Definition: NvInferRuntime.h:2916
@ kLAYER_NAMES_ONLY
Print only the layer names. This is the default setting.
@ kDETAILED
Print detailed layer information including layer names and layer parameters.
TacticSource
List of tactic sources for TensorRT.
Definition: NvInferRuntime.h:2867
@ kEDGE_MASK_CONVOLUTIONS
TensorFormat PluginFormat
PluginFormat is reserved for backward compatibility.
Definition: NvInferRuntimePlugin.h:54
v_1_0::IPluginV3OneRuntime IPluginV3OneRuntime
Definition: NvInferRuntime.h:1232
@ kMIN
Minimum of the two elements.
constexpr int32_t EnumMax< TempfileControlFlag >() noexcept
Maximum number of elements in TempfileControlFlag enum.
Definition: NvInferRuntime.h:1339
uint32_t SerializationFlags
Represents one or more SerializationFlag values using binary OR operations, e.g., 1U << Serialization...
Definition: NvInferRuntime.h:2935
@ kLINEAR
Supports linear (1D), bilinear (2D), and trilinear (3D) interpolation.
v_1_0::IPluginV3OneBuild IPluginV3OneBuild
Definition: NvInferRuntime.h:1220
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntime.h:1383
ExecutionContextAllocationStrategy
Different memory allocation behaviors for IExecutionContext.
Definition: NvInferRuntime.h:3049
@ kSTATIC
Default static allocation with the maximum size across all profiles.
@ kUSER_MANAGED
The user supplies custom allocation to the execution context.
@ kON_PROFILE_CHANGE
Reallocate for a profile when it's selected.
constexpr int32_t EnumMax< TacticSource >() noexcept
Maximum number of tactic sources in TacticSource enum.
Definition: NvInferRuntime.h:2893
LayerInformationFormat
The format in which the IEngineInspector prints the layer information.
Definition: NvInferRuntime.h:5073
@ kJSON
Print layer information in JSON format.
@ kONELINE
Print layer information in one line per layer.
v_1_0::IStreamReader IStreamReader
Definition: NvInferRuntime.h:674
AllocatorFlag
Allowed type of memory allocation.
Definition: NvInferRuntime.h:1508
@ kRESIZABLE
TensorRT may call realloc() on this allocation.
@ kMAX
Maximum over elements.
constexpr int32_t EnumMax< DeviceType >() noexcept
Maximum number of elements in DeviceType enum.
Definition: NvInferRuntime.h:1311
constexpr int32_t EnumMax< DimensionOperation >() noexcept
Maximum number of elements in DimensionOperation enum.
Definition: NvInferRuntime.h:193
constexpr int32_t EnumMax< ExecutionContextAllocationStrategy >() noexcept
Maximum number of memory allocation strategies in ExecutionContextAllocationStrategy enum.
Definition: NvInferRuntime.h:3061
constexpr int32_t EnumMax< SerializationFlag >() noexcept
Maximum number of serialization flags in SerializationFlag enum.
Definition: NvInferRuntime.h:2952
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntime.h:204
@ kHOST
Data stored on host.
@ kDEVICE
Data stored on device.
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:2578
@ kOPT
This is used to set or get the value that is used in the optimization (kernel selection).
uint32_t AllocatorFlags
Definition: NvInferRuntime.h:1524
Summarizes tensors that a plugin might see for an input or output.
Definition: NvInferRuntime.h:362
Dims min
Lower bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:367
Dims max
Upper bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:370
Dims opt
Optimum value of tensor’s dimensions specified for auto-tuning.
Definition: NvInferRuntime.h:373
PluginTensorDesc desc
Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of ...
Definition: NvInferRuntime.h:364
Plugin field collection struct.
Definition: NvInferPluginBase.h:103
Fields that a plugin might see for an input or output.
Definition: NvInferRuntimePlugin.h:73
Declaration of EnumMaxImpl struct to store maximum number of elements in an enumeration type.
Definition: NvInferRuntimeBase.h:128