18#ifndef NV_INFER_RUNTIME_H
19#define NV_INFER_RUNTIME_H
33class IExecutionContext;
36class IEngineInspector;
102 static constexpr int32_t kVALUE = 3;
146 return mImpl->data();
150 std::size_t
size() const noexcept
152 return mImpl->size();
158 return mImpl->type();
212 static constexpr int32_t kVALUE = 2;
236 return mImpl->isConstant();
247 return mImpl->getConstantValue();
262 return mImpl->isSizeTensor();
291 return mImpl->constant(value);
303 return mImpl->operation(op, first, second);
337 return mImpl->declareSizeTensor(outputIndex, opt, upper);
433 int32_t outputIndex,
DimsExprs const* inputs, int32_t nbInputs,
IExprBuilder& exprBuilder) noexcept = 0;
438 static constexpr int32_t kFORMAT_COMBINATION_LIMIT = 100;
472 virtual
bool supportsFormatCombination(
473 int32_t pos,
PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
525 int32_t nbOutputs) const noexcept = 0;
540 void const* const* inputs,
void* const* outputs,
void* workspace, cudaStream_t stream) noexcept = 0;
550 int32_t getTensorRTVersion() const noexcept
override
564 bool const*,
PluginFormat, int32_t)
noexcept override final
579 Dims getOutputDimensions(int32_t,
Dims const*, int32_t)
noexcept override final
591 TRT_DEPRECATED bool isOutputBroadcastAcrossBatch(int32_t,
bool const*, int32_t)
const noexcept override final
603 TRT_DEPRECATED bool canBroadcastInputAcrossBatch(int32_t)
const noexcept override final
611 size_t getWorkspaceSize(int32_t)
const noexcept override final
619 int32_t enqueue(int32_t,
void const*
const*,
void*
const*,
void*, cudaStream_t)
noexcept override final
769 virtual
AsciiChar const* getPluginVersion() const noexcept = 0;
780 virtual
AsciiChar const* getPluginNamespace() const noexcept = 0;
791 static constexpr int32_t kDEFAULT_FORMAT_COMBINATION_LIMIT = 100;
838 DataType* outputTypes, int32_t nbOutputs,
const DataType* inputTypes, int32_t nbInputs)
const noexcept = 0;
861 int32_t nbShapeInputs,
DimsExprs* outputs, int32_t nbOutputs,
IExprBuilder& exprBuilder)
noexcept = 0;
987 return kDEFAULT_FORMAT_COMBINATION_LIMIT;
1060 void const*
const* inputs,
void*
const* outputs,
void* workspace, cudaStream_t stream)
noexcept = 0;
1173 virtual
AsciiChar const* getPluginName() const noexcept = 0;
1181 virtual
AsciiChar const* getPluginVersion() const noexcept = 0;
1189 virtual
AsciiChar const* getPluginNamespace() const noexcept = 0;
1344 void setDLACore(int32_t dlaCore) noexcept
1346 mImpl->setDLACore(dlaCore);
1356 return mImpl->getDLACore();
1364 return mImpl->getNbDLACores();
1379 mImpl->setGpuAllocator(allocator);
1398 mImpl->setErrorRecorder(recorder);
1413 return mImpl->getErrorRecorder();
1428 return mImpl->deserializeCudaEngine(blob, size);
1446 return mImpl->deserializeCudaEngine(streamReader);
1456 return mImpl->getLogger();
1471 return mImpl->setMaxThreads(maxThreads);
1485 return mImpl->getMaxThreads();
1520 return mImpl->setTemporaryDirectory(path);
1531 return mImpl->getTemporaryDirectory();
1547 return mImpl->setTempfileControlFlags(flags);
1559 return mImpl->getTempfileControlFlags();
1569 return mImpl->getPluginRegistry();
1599 return mImpl->setEngineHostCodeAllowed(allowed);
1609 return mImpl->getEngineHostCodeAllowed();
1645 return mImpl->setWeights(layerName, role, weights);
1662 return mImpl->refitCudaEngine();
1683 return mImpl->getMissing(size, layerNames, roles);
1700 return mImpl->getAll(size, layerNames, roles);
1722 return mImpl->setDynamicRange(tensorName, min, max);
1738 return mImpl->getDynamicRangeMin(tensorName);
1754 return mImpl->getDynamicRangeMax(tensorName);
1772 return mImpl->getTensorsWithDynamicRange(size, tensorNames);
1791 mImpl->setErrorRecorder(recorder);
1806 return mImpl->getErrorRecorder();
1831 return mImpl->setNamedWeights(name, weights);
1851 return mImpl->getMissingWeights(size, weightsNames);
1867 return mImpl->getAllWeights(size, weightsNames);
1877 return mImpl->getLogger();
1893 return mImpl->setMaxThreads(maxThreads);
1907 return mImpl->getMaxThreads();
1934 return mImpl->setNamedWeightsWithLocation(name, weights, location);
1950 return mImpl->getNamedWeights(weightsName);
1966 return mImpl->getWeightsLocation(weightsName);
1982 return mImpl->unsetNamedWeights(weightsName);
1998 return mImpl->setWeightsValidation(weightsValidation);
2006 return mImpl->getWeightsValidation();
2028 return mImpl->refitCudaEngineAsync(stream);
2046 return mImpl->getWeightsPrototype(weightsName);
2135 return mImpl->setDimensions(inputName, select, dims);
2147 return mImpl->getDimensions(inputName, select);
2194 char const* inputName,
OptProfileSelector select, int32_t
const* values, int32_t nbValues)
noexcept
2196 return mImpl->setShapeValues(inputName, select, values, nbValues);
2209 return mImpl->getNbShapeValues(inputName);
2221 return mImpl->getShapeValues(inputName, select);
2239 return mImpl->setExtraMemoryTarget(target);
2251 return mImpl->getExtraMemoryTarget();
2268 return mImpl->isValid();
2399 return mImpl->setFlags(serializationFlags);
2411 return mImpl->getFlags();
2423 return mImpl->clearFlag(serializationFlag);
2435 return mImpl->setFlag(serializationFlag);
2447 return mImpl->getFlag(serializationFlag);
2505 Dims getTensorShape(
char const* tensorName) const noexcept
2507 return mImpl->getTensorShape(tensorName);
2522 return mImpl->getTensorDataType(tensorName);
2536 return mImpl->getNbLayers();
2550 return mImpl->serialize();
2568 return mImpl->createExecutionContext(strategy);
2585 return mImpl->getTensorLocation(tensorName);
2605 return mImpl->isShapeInferenceIO(tensorName);
2619 return mImpl->getTensorIOMode(tensorName);
2631 return mImpl->createExecutionContextWithoutDeviceMemory();
2643 return mImpl->getDeviceMemorySize();
2655 return mImpl->getDeviceMemorySizeForProfile(profileIndex);
2671 return mImpl->getDeviceMemorySizeV2();
2687 return mImpl->getDeviceMemorySizeForProfileV2(profileIndex);
2697 return mImpl->isRefittable();
2718 return mImpl->getTensorBytesPerComponent(tensorName);
2736 return mImpl->getTensorBytesPerComponentV2(tensorName, profileIndex);
2757 return mImpl->getTensorComponentsPerElement(tensorName);
2775 return mImpl->getTensorComponentsPerElementV2(tensorName, profileIndex);
2790 return mImpl->getTensorFormat(tensorName);
2804 return mImpl->getTensorFormatV2(tensorName, profileIndex);
2828 return mImpl->getTensorFormatDesc(tensorName);
2851 return mImpl->getTensorFormatDescV2(tensorName, profileIndex);
2868 return mImpl->getTensorVectorizedDim(tensorName);
2884 return mImpl->getTensorVectorizedDimV2(tensorName, profileIndex);
2899 return mImpl->getName();
2910 return mImpl->getNbOptimizationProfiles();
2930 return mImpl->getProfileShape(tensorName, profileIndex, select);
2953 return mImpl->getProfileTensorValues(tensorName, profileIndex, select);
2968 return mImpl->getEngineCapability();
2987 return mImpl->setErrorRecorder(recorder);
3002 return mImpl->getErrorRecorder();
3016 return mImpl->hasImplicitBatchDimension();
3032 return mImpl->getTacticSources();
3044 return mImpl->getProfilingVerbosity();
3054 return mImpl->createEngineInspector();
3067 return mImpl->getNbIOTensors();
3079 return mImpl->getIOTensorName(index);
3092 return mImpl->getHardwareCompatibilityLevel();
3107 return mImpl->getNbAuxStreams();
3117 return mImpl->createSerializationConfig();
3133 return mImpl->serializeWithConfig(config);
3178 return mImpl->setWeightStreamingBudget(gpuMemoryBudget);
3198 return mImpl->getWeightStreamingBudget();
3221 return mImpl->getMinimumWeightStreamingBudget();
3237 return mImpl->getStreamableWeightsSize();
3279 return mImpl->setWeightStreamingBudgetV2(gpuMemoryBudget);
3297 return mImpl->getWeightStreamingBudgetV2();
3317 return mImpl->getWeightStreamingAutomaticBudget();
3345 return mImpl->getWeightStreamingScratchMemorySize();
3359 return mImpl->isDebugTensor(name);
3376 return {
"IOutputAllocator", 1, 0};
3399 char const* tensorName,
void* currentMemory, uint64_t size, uint64_t alignment)
noexcept
3427 char const* tensorName,
void* currentMemory, uint64_t size, uint64_t alignment, cudaStream_t )
3429 return reallocateOutput(tensorName, currentMemory, size, alignment);
3463 return {
"IDebugListener", 1, 0};
3480 char const* name, cudaStream_t stream)
3517 void setDebugSync(
bool sync) noexcept
3519 mImpl->setDebugSync(sync);
3529 return mImpl->getDebugSync();
3539 mImpl->setProfiler(profiler);
3549 return mImpl->getProfiler();
3559 return mImpl->getEngine();
3573 mImpl->setName(name);
3583 return mImpl->getName();
3610 mImpl->setDeviceMemory(memory);
3632 return mImpl->setDeviceMemoryV2(memory, size);
3653 return mImpl->getTensorStrides(tensorName);
3668 return mImpl->getOptimizationProfile();
3686 return mImpl->setInputShape(tensorName, dims);
3723 return mImpl->getTensorShape(tensorName);
3739 return mImpl->allInputDimensionsSpecified();
3756 return mImpl->allInputShapesSpecified();
3775 mImpl->setErrorRecorder(recorder);
3790 return mImpl->getErrorRecorder();
3807 return mImpl->executeV2(bindings);
3851 return mImpl->setOptimizationProfileAsync(profileIndex, stream);
3867 mImpl->setEnqueueEmitsProfile(enqueueEmitsProfile);
3879 return mImpl->getEnqueueEmitsProfile();
3909 return mImpl->reportToProfiler();
3950 return mImpl->setTensorAddress(tensorName, data);
3967 return mImpl->getTensorAddress(tensorName);
3990 return mImpl->setOutputTensorAddress(tensorName, data);
4012 return mImpl->setInputTensorAddress(tensorName, data);
4031 return mImpl->getOutputTensorAddress(tensorName);
4062 int32_t
inferShapes(int32_t nbMaxNames,
char const** tensorNames)
noexcept
4064 return mImpl->inferShapes(nbMaxNames, tensorNames);
4081 return mImpl->updateDeviceMemorySizeForShapes();
4097 return mImpl->setInputConsumedEvent(event);
4107 return mImpl->getInputConsumedEvent();
4126 return mImpl->setOutputAllocator(tensorName, outputAllocator);
4139 return mImpl->getOutputAllocator(tensorName);
4157 return mImpl->getMaxOutputSize(tensorName);
4177 return mImpl->setTemporaryStorageAllocator(allocator);
4187 return mImpl->getTemporaryStorageAllocator();
4211 return mImpl->enqueueV3(stream);
4227 mImpl->setPersistentCacheLimit(size);
4238 return mImpl->getPersistentCacheLimit();
4262 return mImpl->setNvtxVerbosity(verbosity);
4274 return mImpl->getNvtxVerbosity();
4305 mImpl->setAuxStreams(auxStreams, nbStreams);
4317 return mImpl->setDebugListener(listener);
4327 return mImpl->getDebugListener();
4346 return mImpl->setTensorDebugState(name, flag);
4359 return mImpl->setAllTensorsDebugState(flag);
4369 return mImpl->getDebugState(name);
4431 return mImpl->setExecutionContext(context);
4443 return mImpl->getExecutionContext();
4468 return mImpl->getLayerInformation(layerIndex, format);
4491 return mImpl->getEngineInformation(format);
4510 mImpl->setErrorRecorder(recorder);
4525 return mImpl->getErrorRecorder();
4538extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(
void* logger, int32_t version)
noexcept;
4544extern "C" TENSORRTAPI void* createInferRefitter_INTERNAL(
void* engine,
void* logger, int32_t version)
noexcept;
4596template <
typename T>
4612#define REGISTER_TENSORRT_PLUGIN(name) \
4613 static nvinfer1::PluginRegistrar<name> pluginRegistrar##name {}
4682 cudaStream_t )
noexcept override = 0;
4734 uint64_t
const size, uint64_t
const alignment,
AllocatorFlags const flags)
noexcept override
4736 return allocateAsync(size, alignment, flags,
nullptr);
4759 return deallocateAsync(memory,
nullptr);
4767 return {
"IGpuAllocator", 1, 0};
nvinfer1::ILogger * getLogger() noexcept
Return the logger object.
int32_t getInferLibMajorVersion() noexcept
Return the library major version number.
nvinfer1::IPluginRegistry * getPluginRegistry() noexcept
Return the plugin registry.
int32_t getInferLibPatchVersion() noexcept
Return the library patch version number.
int32_t getInferLibMinorVersion() noexcept
Return the library minor version number.
int32_t getInferLibBuildVersion() noexcept
Return the library build version number.
#define TENSORRTAPI
Definition: NvInferRuntimeBase.h:59
#define NV_TENSORRT_VERSION
Definition: NvInferRuntimeBase.h:93
#define TRT_DEPRECATED
Definition: NvInferRuntimeBase.h:45
#define TRT_DEPRECATED_ENUM
Definition: NvInferRuntimeBase.h:46
Structure to define the dimensions of a tensor.
Definition: NvInferRuntimeBase.h:202
static constexpr int32_t MAX_DIMS
The maximum rank (number of dimensions) supported for a tensor.
Definition: NvInferRuntimeBase.h:205
Analog of class Dims with expressions instead of constants for the dimensions.
Definition: NvInferRuntime.h:347
IDimensionExpr const * d[Dims::MAX_DIMS]
The extent of each dimension.
Definition: NvInferRuntime.h:350
int32_t nbDims
The number of dimensions.
Definition: NvInferRuntime.h:349
An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:2491
int32_t getTensorBytesPerComponent(char const *tensorName) const noexcept
Return the number of bytes per component of an element, or -1 if the tensor is not vectorized or prov...
Definition: NvInferRuntime.h:2716
ISerializationConfig * createSerializationConfig() noexcept
Create a serialization configuration object.
Definition: NvInferRuntime.h:3115
TRT_DEPRECATED int64_t getWeightStreamingBudget() const noexcept
Returns the current weight streaming device memory budget in bytes.
Definition: NvInferRuntime.h:3196
int32_t const * getProfileTensorValues(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values (not dimensions) for an input tensor given its name under ...
Definition: NvInferRuntime.h:2950
char const * getIOTensorName(int32_t index) const noexcept
Return name of an IO tensor.
Definition: NvInferRuntime.h:3077
int64_t getWeightStreamingBudgetV2() const noexcept
Returns the current weight streaming device memory budget in bytes.
Definition: NvInferRuntime.h:3295
EngineCapability getEngineCapability() const noexcept
Determine what execution capability this engine has.
Definition: NvInferRuntime.h:2966
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:3000
TensorFormat getTensorFormat(char const *tensorName, int32_t profileIndex) const noexcept
Return the tensor format of given profile, or TensorFormat::kLINEAR if the provided name does not map...
Definition: NvInferRuntime.h:2802
TRT_DEPRECATED bool hasImplicitBatchDimension() const noexcept
Query whether the engine was built with an implicit batch dimension.
Definition: NvInferRuntime.h:3014
apiv::VCudaEngine * mImpl
Definition: NvInferRuntime.h:3363
TRT_DEPRECATED size_t getDeviceMemorySizeForProfile(int32_t profileIndex) const noexcept
Return the maximum device memory required by the context for a profile.
Definition: NvInferRuntime.h:2653
IExecutionContext * createExecutionContext(ExecutionContextAllocationStrategy strategy=ExecutionContextAllocationStrategy::kSTATIC) noexcept
Create an execution context and specify the strategy for allocating internal activation memory.
Definition: NvInferRuntime.h:2565
char const * getTensorFormatDesc(char const *tensorName) const noexcept
Return the human readable description of the tensor format, or empty string if the provided name does...
Definition: NvInferRuntime.h:2826
Dims getProfileShape(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for an input tensor given its name under an optimizati...
Definition: NvInferRuntime.h:2928
bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept
Limit the maximum amount of GPU memory usable for network weights in bytes.
Definition: NvInferRuntime.h:3277
int32_t getNbAuxStreams() const noexcept
Return the number of auxiliary streams used by this engine.
Definition: NvInferRuntime.h:3105
int64_t getStreamableWeightsSize() const noexcept
Get the total size in bytes of all streamable weights.
Definition: NvInferRuntime.h:3235
DataType getTensorDataType(char const *tensorName) const noexcept
Determine the required data type for a buffer from its tensor name.
Definition: NvInferRuntime.h:2520
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:2985
TacticSources getTacticSources() const noexcept
return the tactic sources required by this engine.
Definition: NvInferRuntime.h:3030
IHostMemory * serializeWithConfig(ISerializationConfig &config) const noexcept
Serialize the network to a stream with the provided SerializationConfig.
Definition: NvInferRuntime.h:3131
virtual ~ICudaEngine() noexcept=default
int64_t getWeightStreamingAutomaticBudget() const noexcept
TensorRT automatically determines an ideal budget for the model to run.
Definition: NvInferRuntime.h:3315
bool isDebugTensor(char const *name) const noexcept
Check if a tensor is marked as a debug tensor.
Definition: NvInferRuntime.h:3357
int32_t getTensorVectorizedDim(char const *tensorName, int32_t profileIndex) const noexcept
Return the dimension index that the buffer is vectorized of given profile, or -1 if the provided name...
Definition: NvInferRuntime.h:2882
char const * getName() const noexcept
Returns the name of the network associated with the engine.
Definition: NvInferRuntime.h:2897
ProfilingVerbosity getProfilingVerbosity() const noexcept
Return the ProfilingVerbosity the builder config was set to when the engine was built.
Definition: NvInferRuntime.h:3042
bool isShapeInferenceIO(char const *tensorName) const noexcept
True if tensor is required as input for shape calculations or is output from shape calculations.
Definition: NvInferRuntime.h:2603
int64_t getWeightStreamingScratchMemorySize() const noexcept
Returns the size of the scratch memory required by the current weight streaming budget.
Definition: NvInferRuntime.h:3343
TRT_DEPRECATED bool setWeightStreamingBudget(int64_t gpuMemoryBudget) noexcept
Limit the maximum amount of GPU memory usable for network weights in bytes.
Definition: NvInferRuntime.h:3176
int64_t getDeviceMemorySizeV2() const noexcept
Return the maximum device memory required by the context over all profiles.
Definition: NvInferRuntime.h:2669
int32_t getTensorVectorizedDim(char const *tensorName) const noexcept
Return the dimension index that the buffer is vectorized, or -1 if the provided name does not map to ...
Definition: NvInferRuntime.h:2866
TRT_DEPRECATED size_t getDeviceMemorySize() const noexcept
Return the maximum device memory required by the context over all profiles.
Definition: NvInferRuntime.h:2641
int32_t getTensorComponentsPerElement(char const *tensorName, int32_t profileIndex) const noexcept
Return the number of components included in one element of given profile, or -1 if tensor is not vect...
Definition: NvInferRuntime.h:2773
int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept
Return the maximum device memory required by the context for a profile.
Definition: NvInferRuntime.h:2685
TensorFormat getTensorFormat(char const *tensorName) const noexcept
Return the tensor format, or TensorFormat::kLINEAR if the provided name does not map to an input or o...
Definition: NvInferRuntime.h:2788
IHostMemory * serialize() const noexcept
Serialize the network to a stream.
Definition: NvInferRuntime.h:2548
TensorLocation getTensorLocation(char const *tensorName) const noexcept
Get whether an input or output tensor must be on GPU or CPU.
Definition: NvInferRuntime.h:2583
IEngineInspector * createEngineInspector() const noexcept
Create a new engine inspector which prints the layer information in an engine or an execution context...
Definition: NvInferRuntime.h:3052
int32_t getTensorBytesPerComponent(char const *tensorName, int32_t profileIndex) const noexcept
Return the number of bytes per component of an element given of given profile, or -1 if the tensor is...
Definition: NvInferRuntime.h:2734
HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept
Return the hardware compatibility level of this engine.
Definition: NvInferRuntime.h:3090
int32_t getNbOptimizationProfiles() const noexcept
Get the number of optimization profiles defined for this engine.
Definition: NvInferRuntime.h:2908
TRT_DEPRECATED IExecutionContext * createExecutionContextWithoutDeviceMemory() noexcept
create an execution context without any device memory allocated
Definition: NvInferRuntime.h:2629
char const * getTensorFormatDesc(char const *tensorName, int32_t profileIndex) const noexcept
Return the human readable description of the tensor format of given profile, or empty string if the p...
Definition: NvInferRuntime.h:2849
TRT_DEPRECATED int64_t getMinimumWeightStreamingBudget() const noexcept
The minimum number of bytes of GPU memory required by network weights for successful weight streaming...
Definition: NvInferRuntime.h:3219
TensorIOMode getTensorIOMode(char const *tensorName) const noexcept
Determine whether a tensor is an input or output tensor.
Definition: NvInferRuntime.h:2617
int32_t getNbLayers() const noexcept
Get the number of layers in the network.
Definition: NvInferRuntime.h:2534
int32_t getNbIOTensors() const noexcept
Return number of IO tensors.
Definition: NvInferRuntime.h:3065
int32_t getTensorComponentsPerElement(char const *tensorName) const noexcept
Return the number of components included in one element, or -1 if tensor is not vectorized or if the ...
Definition: NvInferRuntime.h:2755
bool isRefittable() const noexcept
Return true if an engine can be refit.
Definition: NvInferRuntime.h:2695
An IDimensionExpr represents an integer expression constructed from constants, input dimensions,...
Definition: NvInferRuntime.h:229
bool isConstant() const noexcept
Return true if expression is a build-time constant.
Definition: NvInferRuntime.h:234
bool isSizeTensor() const noexcept
Return true if this denotes the value of a size tensor.
Definition: NvInferRuntime.h:260
virtual ~IDimensionExpr() noexcept=default
apiv::VDimensionExpr * mImpl
Definition: NvInferRuntime.h:251
int64_t getConstantValue() const noexcept
Get the value of the constant.
Definition: NvInferRuntime.h:245
An engine inspector which prints out the layer information of an engine or an execution context.
Definition: NvInferRuntime.h:4413
char const * getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
Get a string describing the information about a specific layer in the current engine or the execution...
Definition: NvInferRuntime.h:4466
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:4523
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:4508
IExecutionContext const * getExecutionContext() const noexcept
Get the context currently being inspected.
Definition: NvInferRuntime.h:4441
apiv::VEngineInspector * mImpl
Definition: NvInferRuntime.h:4529
virtual ~IEngineInspector() noexcept=default
char const * getEngineInformation(LayerInformationFormat format) const noexcept
Get a string describing the information about all the layers in the current engine or the execution c...
Definition: NvInferRuntime.h:4489
Context for executing inference using an engine, with functionally unsafe features.
Definition: NvInferRuntime.h:3505
IOutputAllocator * getOutputAllocator(char const *tensorName) const noexcept
Get output allocator associated with output tensor of given name, or nullptr if the provided name doe...
Definition: NvInferRuntime.h:4137
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:3788
bool reportToProfiler() const noexcept
Calculate layer timing info for the current optimization profile in IExecutionContext and update the ...
Definition: NvInferRuntime.h:3907
virtual ~IExecutionContext() noexcept=default
void setDeviceMemory(void *memory) noexcept
Set the device memory for use by this execution context.
Definition: NvInferRuntime.h:3608
TRT_DEPRECATED bool allInputShapesSpecified() const noexcept
Whether all input shape bindings have been specified.
Definition: NvInferRuntime.h:3754
bool setTensorDebugState(char const *name, bool flag) noexcept
Set debug state of tensor given the tensor name.
Definition: NvInferRuntime.h:4344
char const * getName() const noexcept
Return the name of the execution context.
Definition: NvInferRuntime.h:3581
IGpuAllocator * getTemporaryStorageAllocator() const noexcept
Get allocator set by setTemporaryStorageAllocator.
Definition: NvInferRuntime.h:4185
void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
Set whether enqueue emits layer timing to the profiler.
Definition: NvInferRuntime.h:3865
Dims getTensorShape(char const *tensorName) const noexcept
Return the shape of the given input or output.
Definition: NvInferRuntime.h:3721
bool getDebugState(char const *name) const noexcept
Definition: NvInferRuntime.h:4367
bool setInputShape(char const *tensorName, Dims const &dims) noexcept
Set shape of given input.
Definition: NvInferRuntime.h:3684
bool executeV2(void *const *bindings) noexcept
Synchronously execute a network.
Definition: NvInferRuntime.h:3805
bool getEnqueueEmitsProfile() const noexcept
Get the enqueueEmitsProfile state.
Definition: NvInferRuntime.h:3877
void const * getTensorAddress(char const *tensorName) const noexcept
Get memory address bound to given input or output tensor, or nullptr if the provided name does not ma...
Definition: NvInferRuntime.h:3965
bool setOutputAllocator(char const *tensorName, IOutputAllocator *outputAllocator) noexcept
Set output allocator to use for output tensor of given name. Pass nullptr to outputAllocator to unset...
Definition: NvInferRuntime.h:4124
bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
Select an optimization profile for the current context with async semantics.
Definition: NvInferRuntime.h:3849
apiv::VExecutionContext * mImpl
Definition: NvInferRuntime.h:4373
bool setOutputTensorAddress(char const *tensorName, void *data) noexcept
Set the memory address for a given output tensor.
Definition: NvInferRuntime.h:3988
void setPersistentCacheLimit(size_t size) noexcept
Set the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:4225
size_t getPersistentCacheLimit() const noexcept
Get the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:4236
bool setAllTensorsDebugState(bool flag) noexcept
Definition: NvInferRuntime.h:4357
ICudaEngine const & getEngine() const noexcept
Get the associated engine.
Definition: NvInferRuntime.h:3557
ProfilingVerbosity getNvtxVerbosity() const noexcept
Get the NVTX verbosity of the execution context.
Definition: NvInferRuntime.h:4272
size_t updateDeviceMemorySizeForShapes() noexcept
Recompute the internal activation buffer sizes based on the current input shapes, and return the tota...
Definition: NvInferRuntime.h:4079
void setAuxStreams(cudaStream_t *auxStreams, int32_t nbStreams) noexcept
Set the auxiliary streams that TensorRT should launch kernels on in the next enqueueV3() call.
Definition: NvInferRuntime.h:4303
int64_t getMaxOutputSize(char const *tensorName) const noexcept
Get upper bound on an output tensor's size, in bytes, based on the current optimization profile and i...
Definition: NvInferRuntime.h:4155
int32_t inferShapes(int32_t nbMaxNames, char const **tensorNames) noexcept
Run shape calculations.
Definition: NvInferRuntime.h:4062
bool setDebugListener(IDebugListener *listener) noexcept
Set DebugListener for this execution context.
Definition: NvInferRuntime.h:4315
bool setTensorAddress(char const *tensorName, void *data) noexcept
Set memory address for given input or output tensor.
Definition: NvInferRuntime.h:3948
bool setTemporaryStorageAllocator(IGpuAllocator *allocator) noexcept
Specify allocator to use for internal temporary storage.
Definition: NvInferRuntime.h:4175
void * getOutputTensorAddress(char const *tensorName) const noexcept
Get memory address for given output.
Definition: NvInferRuntime.h:4029
bool enqueueV3(cudaStream_t stream) noexcept
Enqueue inference on a stream.
Definition: NvInferRuntime.h:4209
IDebugListener * getDebugListener() noexcept
Get the DebugListener of this execution context.
Definition: NvInferRuntime.h:4325
int32_t getOptimizationProfile() const noexcept
Get the index of the currently selected optimization profile.
Definition: NvInferRuntime.h:3666
bool setInputTensorAddress(char const *tensorName, void const *data) noexcept
Set memory address for given input.
Definition: NvInferRuntime.h:4010
bool getDebugSync() const noexcept
Get the debug sync flag.
Definition: NvInferRuntime.h:3527
bool setInputConsumedEvent(cudaEvent_t event) noexcept
Mark input as consumed.
Definition: NvInferRuntime.h:4095
Dims getTensorStrides(char const *tensorName) const noexcept
Return the strides of the buffer for the given tensor name.
Definition: NvInferRuntime.h:3651
bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
Set the verbosity of the NVTX markers in the execution context.
Definition: NvInferRuntime.h:4260
IProfiler * getProfiler() const noexcept
Get the profiler.
Definition: NvInferRuntime.h:3547
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:3773
void setDeviceMemoryV2(void *memory, int64_t size) noexcept
Set the device memory and its corresponding size for use by this execution context.
Definition: NvInferRuntime.h:3630
bool allInputDimensionsSpecified() const noexcept
Whether all dynamic dimensions of input tensors have been specified.
Definition: NvInferRuntime.h:3737
void setProfiler(IProfiler *profiler) noexcept
Set the profiler.
Definition: NvInferRuntime.h:3537
void setName(char const *name) noexcept
Set the name of the execution context.
Definition: NvInferRuntime.h:3571
cudaEvent_t getInputConsumedEvent() const noexcept
The event associated with consuming the input.
Definition: NvInferRuntime.h:4105
Object for constructing IDimensionExpr.
Definition: NvInferRuntime.h:284
IDimensionExpr const * operation(DimensionOperation op, IDimensionExpr const &first, IDimensionExpr const &second) noexcept
Get the operation.
Definition: NvInferRuntime.h:300
virtual ~IExprBuilder() noexcept=default
IDimensionExpr const * constant(int64_t value) noexcept
Return pointer to IDimensionExp for given value.
Definition: NvInferRuntime.h:289
apiv::VExprBuilder * mImpl
Definition: NvInferRuntime.h:307
IDimensionExpr const * declareSizeTensor(int32_t outputIndex, IDimensionExpr const &opt, IDimensionExpr const &upper)
Declare a size tensor at the given output index, with the specified auto-tuning formula and upper bou...
Definition: NvInferRuntime.h:335
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:139
void * data() const noexcept
A pointer to the raw data that is owned by the library.
Definition: NvInferRuntime.h:144
DataType type() const noexcept
The type of the memory that was allocated.
Definition: NvInferRuntime.h:156
std::size_t size() const noexcept
The size in bytes of the data that was allocated.
Definition: NvInferRuntime.h:150
apiv::VHostMemory * mImpl
Definition: NvInferRuntime.h:162
virtual ~IHostMemory() noexcept=default
A virtual base class to find a logger. Allows a plugin to find an instance of a logger if it needs to...
Definition: NvInferRuntime.h:4626
virtual ILogger * findLogger()=0
Get the logger used by the engine or execution context which called the plugin method.
virtual ~ILoggerFinder()=default
Application-implemented logging interface for the builder, refitter and runtime.
Definition: NvInferRuntimeBase.h:683
Forward declaration of IEngineInspector for use by other interfaces.
Definition: NvInferRuntime.h:48
INoCopy & operator=(INoCopy &&other)=delete
INoCopy(INoCopy const &other)=delete
INoCopy(INoCopy &&other)=delete
virtual ~INoCopy()=default
INoCopy & operator=(INoCopy const &other)=delete
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:2104
int32_t const * getShapeValues(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2219
apiv::VOptimizationProfile * mImpl
Definition: NvInferRuntime.h:2272
Dims getDimensions(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:2145
virtual ~IOptimizationProfile() noexcept=default
float getExtraMemoryTarget() const noexcept
Get the extra memory target that has been defined for this profile.
Definition: NvInferRuntime.h:2249
bool setExtraMemoryTarget(float target) noexcept
Set a target for extra GPU memory that may be used by this profile.
Definition: NvInferRuntime.h:2237
bool setDimensions(char const *inputName, OptProfileSelector select, Dims const &dims) noexcept
Set the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:2133
bool setShapeValues(char const *inputName, OptProfileSelector select, int32_t const *values, int32_t nbValues) noexcept
Set the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2193
bool isValid() const noexcept
Check whether the optimization profile can be passed to an IBuilderConfig object.
Definition: NvInferRuntime.h:2266
int32_t getNbShapeValues(char const *inputName) const noexcept
Get the number of values for an input shape tensor.
Definition: NvInferRuntime.h:2207
Single registration point for all plugins in an application. It is used to find plugin implementation...
Definition: NvInferRuntimeCommon.h:56
virtual TRT_DEPRECATED bool registerCreator(IPluginCreator &creator, AsciiChar const *const pluginNamespace) noexcept=0
Register a plugin creator implementing IPluginCreator. Returns false if any plugin creator with the s...
Interface for plugins to access per context resources provided by TensorRT.
Definition: NvInferRuntime.h:639
virtual IErrorRecorder * getErrorRecorder() const noexcept=0
Get the error recorder associated with the resource context.
virtual IGpuAllocator * getGpuAllocator() const noexcept=0
Get the GPU allocator associated with the resource context.
Similar to IPluginV2Ext, but with support for dynamic shapes.
Definition: NvInferRuntime.h:404
IPluginV2DynamicExt * clone() const noexcept override=0
Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin...
virtual ~IPluginV2DynamicExt() noexcept
Definition: NvInferRuntime.h:555
Plugin class for user-implemented layers.
Definition: NvInferRuntimePlugin.h:464
Updates weights in an engine.
Definition: NvInferRuntime.h:1624
bool refitCudaEngineAsync(cudaStream_t stream) noexcept
Enqueue weights refitting of the associated engine on the given stream.
Definition: NvInferRuntime.h:2026
int32_t getMaxThreads() const noexcept
get the maximum number of threads that can be used by the refitter.
Definition: NvInferRuntime.h:1905
TensorLocation getWeightsLocation(char const *weightsName) const noexcept
Get location for the weights associated with the given name.
Definition: NvInferRuntime.h:1964
bool setNamedWeights(char const *name, Weights weights) noexcept
Specify new weights of given name.
Definition: NvInferRuntime.h:1829
int32_t getAllWeights(int32_t size, char const **weightsNames) noexcept
Get names of all weights that could be refit.
Definition: NvInferRuntime.h:1865
ILogger * getLogger() const noexcept
get the logger with which the refitter was created
Definition: NvInferRuntime.h:1875
bool refitCudaEngine() noexcept
Refits associated engine.
Definition: NvInferRuntime.h:1660
int32_t getMissingWeights(int32_t size, char const **weightsNames) noexcept
Get names of missing weights.
Definition: NvInferRuntime.h:1849
TRT_DEPRECATED float getDynamicRangeMax(char const *tensorName) const noexcept
Get maximum of dynamic range.
Definition: NvInferRuntime.h:1752
int32_t getMissing(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of missing weights.
Definition: NvInferRuntime.h:1681
Weights getNamedWeights(char const *weightsName) const noexcept
Get weights associated with the given name.
Definition: NvInferRuntime.h:1948
bool unsetNamedWeights(char const *weightsName) noexcept
Unset weights associated with the given name.
Definition: NvInferRuntime.h:1980
Weights getWeightsPrototype(char const *weightsName) const noexcept
Get the Weights prototype associated with the given name.
Definition: NvInferRuntime.h:2044
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:1891
TRT_DEPRECATED float getDynamicRangeMin(char const *tensorName) const noexcept
Get minimum of dynamic range.
Definition: NvInferRuntime.h:1736
TRT_DEPRECATED int32_t getTensorsWithDynamicRange(int32_t size, char const **tensorNames) const noexcept
Get names of all tensors that have refittable dynamic ranges.
Definition: NvInferRuntime.h:1770
bool setNamedWeights(char const *name, Weights weights, TensorLocation location) noexcept
Specify new weights on a specified device of given name.
Definition: NvInferRuntime.h:1932
void setWeightsValidation(bool weightsValidation) noexcept
Set whether to validate weights during refitting.
Definition: NvInferRuntime.h:1996
TRT_DEPRECATED bool setDynamicRange(char const *tensorName, float min, float max) noexcept
Definition: NvInferRuntime.h:1720
apiv::VRefitter * mImpl
Definition: NvInferRuntime.h:2050
int32_t getAll(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of all weights that could be refit.
Definition: NvInferRuntime.h:1698
virtual ~IRefitter() noexcept=default
bool getWeightsValidation() const noexcept
Get whether to validate weights values during refitting.
Definition: NvInferRuntime.h:2004
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:1789
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:1804
Allows a serialized functionally unsafe engine to be deserialized.
Definition: NvInferRuntime.h:1329
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:1469
IRuntime * loadRuntime(char const *path) noexcept
Load IRuntime from the file.
Definition: NvInferRuntime.h:1585
ICudaEngine * deserializeCudaEngine(IStreamReader &streamReader)
Deserialize an engine from a stream.
Definition: NvInferRuntime.h:1444
bool getEngineHostCodeAllowed() const noexcept
Get whether the runtime is allowed to deserialize engines with host executable code.
Definition: NvInferRuntime.h:1607
TempfileControlFlags getTempfileControlFlags() const noexcept
Get the tempfile control flags for this runtime.
Definition: NvInferRuntime.h:1557
void setEngineHostCodeAllowed(bool allowed) noexcept
Set whether the runtime is allowed to deserialize engines with host executable code.
Definition: NvInferRuntime.h:1597
virtual ~IRuntime() noexcept=default
void setTemporaryDirectory(char const *path) noexcept
Set the directory that will be used by this runtime for temporary files.
Definition: NvInferRuntime.h:1518
IPluginRegistry & getPluginRegistry() noexcept
Get the local plugin registry that can be used by the runtime.
Definition: NvInferRuntime.h:1567
apiv::VRuntime * mImpl
Definition: NvInferRuntime.h:1613
int32_t getNbDLACores() const noexcept
Returns number of DLA hardware cores accessible or 0 if DLA is unavailable.
Definition: NvInferRuntime.h:1362
ICudaEngine * deserializeCudaEngine(void const *blob, std::size_t size) noexcept
Deserialize an engine from host memory.
Definition: NvInferRuntime.h:1426
void setTempfileControlFlags(TempfileControlFlags flags) noexcept
Set the tempfile control flags for this runtime.
Definition: NvInferRuntime.h:1545
int32_t getDLACore() const noexcept
Get the DLA core that the engine executes on.
Definition: NvInferRuntime.h:1354
void setGpuAllocator(IGpuAllocator *allocator) noexcept
Set the GPU allocator.
Definition: NvInferRuntime.h:1377
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:1411
ILogger * getLogger() const noexcept
get the logger with which the runtime was created
Definition: NvInferRuntime.h:1454
int32_t getMaxThreads() const noexcept
Get the maximum number of threads that can be used by the runtime.
Definition: NvInferRuntime.h:1483
char const * getTemporaryDirectory() const noexcept
Get the directory that will be used by this runtime for temporary files.
Definition: NvInferRuntime.h:1529
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:1396
Holds properties for configuring an engine to serialize the binary.
Definition: NvInferRuntime.h:2382
virtual ~ISerializationConfig() noexcept=default
bool clearFlag(SerializationFlag serializationFlag) noexcept
clear a serialization flag.
Definition: NvInferRuntime.h:2421
bool setFlag(SerializationFlag serializationFlag) noexcept
Set a serialization flag.
Definition: NvInferRuntime.h:2433
SerializationFlags getFlags() const noexcept
Get the serialization flags for this config.
Definition: NvInferRuntime.h:2409
bool getFlag(SerializationFlag serializationFlag) const noexcept
Returns true if the serialization flag is set.
Definition: NvInferRuntime.h:2445
apiv::VSerializationConfig * mImpl
Definition: NvInferRuntime.h:2451
An Interface class for version control.
Definition: NvInferRuntimeBase.h:400
Version information associated with a TRT interface.
Definition: NvInferRuntimeBase.h:365
Register the plugin creator to the registry The static registry object will be instantiated when the ...
Definition: NvInferRuntime.h:4598
PluginRegistrar()
Definition: NvInferRuntime.h:4600
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:121
DataType type
The type of the weights.
Definition: NvInferRuntime.h:123
int64_t count
The number of weights in the array.
Definition: NvInferRuntime.h:125
void const * values
The weight values, in a contiguous array.
Definition: NvInferRuntime.h:124
Definition: NvInferRuntime.h:3456
virtual bool processDebugTensor(void const *addr, TensorLocation location, DataType type, Dims const &shape, char const *name, cudaStream_t stream)=0
Callback function that is called when a debug tensor’s value is updated and the debug state of the te...
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:3461
~IDebugListener() override=default
Definition: NvInferRuntimeBase.h:857
Definition: NvInferRuntimeBase.h:469
Definition: NvInferRuntime.h:4647
bool deallocateAsync(void *const memory, cudaStream_t) noexcept override=0
A thread-safe callback implemented by the application to handle stream-ordered asynchronous release o...
IGpuAsyncAllocator()=default
void * allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t) noexcept override=0
A thread-safe callback implemented by the application to handle stream-ordered asynchronous acquisiti...
TRT_DEPRECATED void * allocate(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept override
A thread-safe callback implemented by the application to handle acquisition of GPU memory.
Definition: NvInferRuntime.h:4733
TRT_DEPRECATED bool deallocate(void *const memory) noexcept override
A thread-safe callback implemented by the application to handle release of GPU memory.
Definition: NvInferRuntime.h:4757
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:4765
~IGpuAsyncAllocator() override=default
Definition: NvInferRuntime.h:3369
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:3374
virtual void * reallocateOutputAsync(char const *tensorName, void *currentMemory, uint64_t size, uint64_t alignment, cudaStream_t)
Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated....
Definition: NvInferRuntime.h:3426
virtual void notifyShape(char const *tensorName, Dims const &dims) noexcept=0
Called by TensorRT when the shape of the output tensor is known.
virtual TRT_DEPRECATED void * reallocateOutput(char const *tensorName, void *currentMemory, uint64_t size, uint64_t alignment) noexcept
Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated....
Definition: NvInferRuntime.h:3398
Definition: NvInferRuntime.h:665
Definition: NvInferRuntimePlugin.h:935
Definition: NvInferRuntime.h:1134
virtual PluginFieldCollection const * getFieldNames() noexcept=0
Return a list of fields that need to be passed to createPlugin() when creating a plugin for use in th...
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1139
virtual IPluginV3 * createPlugin(AsciiChar const *name, PluginFieldCollection const *fc, TensorRTPhase phase) noexcept=0
Return a plugin object. Return nullptr in case of error.
Definition: NvInferRuntime.h:687
virtual IPluginV3 * clone() noexcept=0
Clone the plugin object. This copies over internal plugin parameters and returns a new plugin object ...
virtual IPluginCapability * getCapabilityInterface(PluginCapabilityType type) noexcept=0
Return a pointer to plugin object implementing the specified PluginCapabilityType.
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:692
Definition: NvInferRuntime.h:784
virtual int32_t getFormatCombinationLimit() noexcept
Return the maximum number of format combinations that will be timed by TensorRT during the build phas...
Definition: NvInferRuntime.h:985
virtual int32_t getNbOutputs() const noexcept=0
Get the number of outputs from the plugin.
virtual int32_t configurePlugin(DynamicPluginTensorDesc const *in, int32_t nbInputs, DynamicPluginTensorDesc const *out, int32_t nbOutputs) noexcept=0
Configure the plugin.
virtual int32_t getNbTactics() noexcept
Query for the number of custom tactics the plugin intends to use.
Definition: NvInferRuntime.h:961
virtual char const * getMetadataString() noexcept
Query for a string representing the configuration of the plugin. May be called anytime after plugin c...
Definition: NvInferRuntime.h:996
virtual char const * getTimingCacheID() noexcept
Called to query the suffix to use for the timing cache ID. May be called anytime after plugin creatio...
Definition: NvInferRuntime.h:977
virtual bool supportsFormatCombination(int32_t pos, DynamicPluginTensorDesc const *inOut, int32_t nbInputs, int32_t nbOutputs) noexcept=0
Return true if plugin supports the format and datatype for the input/output indexed by pos.
virtual int32_t getOutputDataTypes(DataType *outputTypes, int32_t nbOutputs, const DataType *inputTypes, int32_t nbInputs) const noexcept=0
Provide the data types of the plugin outputs if the input tensors have the data types provided.
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:796
virtual int32_t getOutputShapes(DimsExprs const *inputs, int32_t nbInputs, DimsExprs const *shapeInputs, int32_t nbShapeInputs, DimsExprs *outputs, int32_t nbOutputs, IExprBuilder &exprBuilder) noexcept=0
Provide expressions for computing dimensions of the output tensors from dimensions of the input tenso...
virtual int32_t getValidTactics(int32_t *tactics, int32_t nbTactics) noexcept
Query for any custom tactics that the plugin intends to use.
Definition: NvInferRuntime.h:953
Definition: NvInferRuntime.h:741
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:746
virtual AsciiChar const * getPluginName() const noexcept=0
Return the plugin name. Should match the plugin name returned by the corresponding plugin creator.
Definition: NvInferRuntime.h:1003
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1008
virtual int32_t onShapeChange(PluginTensorDesc const *in, int32_t nbInputs, PluginTensorDesc const *out, int32_t nbOutputs) noexcept=0
Called when a plugin is being prepared for execution for specific dimensions. This could happen multi...
virtual PluginFieldCollection const * getFieldsToSerialize() noexcept=0
Get the plugin fields which should be serialized.
virtual int32_t setTactic(int32_t tactic) noexcept
Set the tactic to be used in the subsequent call to enqueue(). If no custom tactics were advertised,...
Definition: NvInferRuntime.h:1020
virtual int32_t enqueue(PluginTensorDesc const *inputDesc, PluginTensorDesc const *outputDesc, void const *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept=0
Execute the layer.
virtual IPluginV3 * attachToContext(IPluginResourceContext *context) noexcept=0
Clone the plugin, attach the cloned plugin object to a execution context and grant the cloned plugin ...
Definition: NvInferRuntime.h:1215
virtual ~IProfiler() noexcept
Definition: NvInferRuntime.h:1226
virtual void reportLayerTime(char const *layerName, float ms) noexcept=0
Layer time reporting callback.
Definition: NvInferRuntimeBase.h:1114
IRefitter * createInferRefitter(ICudaEngine &engine, ILogger &logger) noexcept
Create an instance of an IRefitter class.
Definition: NvInferRuntime.h:4578
IRuntime * createInferRuntime(ILogger &logger) noexcept
Create an instance of an IRuntime class.
Definition: NvInferRuntime.h:4568
The TensorRT API version 1 namespace.
uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferRuntime.h:2321
v_1_0::IPluginCapability IPluginCapability
Definition: NvInferRuntime.h:682
v_1_0::IOutputAllocator IOutputAllocator
Definition: NvInferRuntime.h:3451
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:73
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:176
@ kSUB
Substract the second element from the first.
@ kSUM
Sum of the two operands.
@ kPROD
Product of the two operands.
@ kFLOOR_DIV
Floor division of the first element by the second.
@ kEQUAL
1 if operands are equal, 0 otherwise.
@ kMIN
Minimum of the two operands.
@ kLESS
1 if first operand is less than second operand, 0 otherwise.
@ kMAX
Maximum of the two operands.
@ kCEIL_DIV
Division rounding up.
TensorIOMode
Definition of tensor IO Mode.
Definition: NvInferRuntimeBase.h:1100
HardwareCompatibilityLevel
Describes requirements of compatibility with GPU architectures other than that of the GPU on which th...
Definition: NvInfer.h:8491
SerializationFlag
List of valid flags that the engine can enable when serializing the bytes.
Definition: NvInferRuntime.h:2362
@ kEXCLUDE_WEIGHTS
Exclude the weights that can be refitted.
v_1_0::IProfiler IProfiler
Definition: NvInferRuntime.h:1241
uint32_t TempfileControlFlags
Represents a collection of one or more TempfileControlFlag values combined using bitwise-OR operation...
Definition: NvInferRuntime.h:1319
PluginCapabilityType
Enumerates the different capability types a IPluginV3 object may have.
Definition: NvInferRuntimePlugin.h:910
constexpr int32_t EnumMax< WeightsRole >() noexcept
Maximum number of elements in WeightsRole enum.
Definition: NvInferRuntime.h:1262
char_t AsciiChar
Definition: NvInferRuntimeBase.h:107
TensorRTPhase
Indicates a phase of operation of TensorRT.
Definition: NvInferRuntimePlugin.h:925
@ kV2_DYNAMICEXT
IPluginV2DynamicExt.
constexpr int32_t EnumMax< LayerInformationFormat >() noexcept
Definition: NvInferRuntime.h:4392
DataType
The type of weights and tensors.
Definition: NvInferRuntimeBase.h:135
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:1273
@ kCONSTANT
Constant layer.
v_1_0::IDebugListener IDebugListener
Definition: NvInferRuntime.h:3492
TempfileControlFlag
Flags used to control TensorRT's behavior when creating executable temporary files.
Definition: NvInferRuntime.h:1296
@ kALLOW_IN_MEMORY_FILES
Allow creating and loading files in-memory (or unnamed files).
constexpr int32_t EnumMax< OptProfileSelector >() noexcept
Number of different values of OptProfileSelector enum.
Definition: NvInferRuntime.h:2076
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:1251
@ kSHIFT
shift part of IScaleLayer
@ kANY
Any other weights role.
@ kBIAS
bias for IConvolutionLayer or IDeconvolutionLayer
@ kKERNEL
kernel for IConvolutionLayer or IDeconvolutionLayer
constexpr int32_t EnumMax< ProfilingVerbosity >() noexcept
Maximum number of profile verbosity levels in ProfilingVerbosity enum.
Definition: NvInferRuntime.h:2341
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
Definition: NvInferRuntime.h:2333
@ kLAYER_NAMES_ONLY
Print only the layer names. This is the default setting.
@ kDETAILED
Print detailed layer information including layer names and layer parameters.
TacticSource
List of tactic sources for TensorRT.
Definition: NvInferRuntime.h:2284
@ kEDGE_MASK_CONVOLUTIONS
TensorFormat PluginFormat
PluginFormat is reserved for backward compatibility.
Definition: NvInferRuntimePlugin.h:48
@ kMIN
Minimum of the two elements.
constexpr int32_t EnumMax< TempfileControlFlag >() noexcept
Maximum number of elements in TempfileControlFlag enum.
Definition: NvInferRuntime.h:1308
uint32_t SerializationFlags
Represents one or more SerializationFlag values using binary OR operations, e.g., 1U << Serialization...
Definition: NvInferRuntime.h:2352
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntimeBase.h:250
ExecutionContextAllocationStrategy
Different memory allocation behaviors for IExecutionContext.
Definition: NvInferRuntime.h:2466
@ kSTATIC
Default static allocation with the maximum size across all profiles.
@ kUSER_MANAGED
The user supplies custom allocation to the execution context.
@ kON_PROFILE_CHANGE
Reallocate for a profile when it's selected.
constexpr int32_t EnumMax< TacticSource >() noexcept
Maximum number of tactic sources in TacticSource enum.
Definition: NvInferRuntime.h:2310
LayerInformationFormat
The format in which the IEngineInspector prints the layer information.
Definition: NvInferRuntime.h:4384
@ kJSON
Print layer information in JSON format.
@ kONELINE
Print layer information in one line per layer.
v_1_0::IGpuAsyncAllocator IGpuAsyncAllocator
Definition: NvInferRuntime.h:4785
@ kMAX
Maximum over elements.
constexpr int32_t EnumMax< DeviceType >() noexcept
Maximum number of elements in DeviceType enum.
Definition: NvInferRuntime.h:1280
constexpr int32_t EnumMax< DimensionOperation >() noexcept
Maximum number of elements in DimensionOperation enum.
Definition: NvInferRuntime.h:190
constexpr int32_t EnumMax< ExecutionContextAllocationStrategy >() noexcept
Maximum number of memory allocation strategies in ExecutionContextAllocationStrategy enum.
Definition: NvInferRuntime.h:2478
constexpr int32_t EnumMax< SerializationFlag >() noexcept
Maximum number of serialization flags in SerializationFlag enum.
Definition: NvInferRuntime.h:2369
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntime.h:201
@ kHOST
Data stored on host.
@ kDEVICE
Data stored on device.
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:2064
@ kOPT
This is used to set or get the value that is used in the optimization (kernel selection).
uint32_t AllocatorFlags
Definition: NvInferRuntimeBase.h:461
Summarizes tensors that a plugin might see for an input or output.
Definition: NvInferRuntime.h:359
Dims min
Lower bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:364
Dims max
Upper bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:367
Dims opt
Optimum value of tensor’s dimensions specified for auto-tuning.
Definition: NvInferRuntime.h:370
PluginTensorDesc desc
Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of ...
Definition: NvInferRuntime.h:361
Plugin field collection struct.
Definition: NvInferRuntimePlugin.h:897
Fields that a plugin might see for an input or output.
Definition: NvInferRuntimePlugin.h:67
Declaration of EnumMaxImpl struct to store maximum number of elements in an enumeration type.
Definition: NvInferRuntimeBase.h:120