TensorRT 10.8.0
|
Context for executing inference using an engine, with functionally unsafe features. More...
#include <NvInferRuntime.h>
Public Member Functions | |
virtual | ~IExecutionContext () noexcept=default |
void | setDebugSync (bool sync) noexcept |
Set the debug sync flag. More... | |
bool | getDebugSync () const noexcept |
Get the debug sync flag. More... | |
void | setProfiler (IProfiler *profiler) noexcept |
Set the profiler. More... | |
IProfiler * | getProfiler () const noexcept |
Get the profiler. More... | |
ICudaEngine const & | getEngine () const noexcept |
Get the associated engine. More... | |
void | setName (char const *name) noexcept |
Set the name of the execution context. More... | |
char const * | getName () const noexcept |
Return the name of the execution context. More... | |
void | setDeviceMemory (void *memory) noexcept |
Set the device memory for use by this execution context. More... | |
void | setDeviceMemoryV2 (void *memory, int64_t size) noexcept |
Set the device memory and its corresponding size for use by this execution context. More... | |
Dims | getTensorStrides (char const *tensorName) const noexcept |
Return the strides of the buffer for the given tensor name. More... | |
int32_t | getOptimizationProfile () const noexcept |
Get the index of the currently selected optimization profile. More... | |
bool | setInputShape (char const *tensorName, Dims const &dims) noexcept |
Set shape of given input. More... | |
Dims | getTensorShape (char const *tensorName) const noexcept |
Return the shape of the given input or output. More... | |
bool | allInputDimensionsSpecified () const noexcept |
Whether all dynamic dimensions of input tensors have been specified. More... | |
TRT_DEPRECATED bool | allInputShapesSpecified () const noexcept |
Whether all input shape bindings have been specified. More... | |
void | setErrorRecorder (IErrorRecorder *recorder) noexcept |
Set the ErrorRecorder for this interface. More... | |
IErrorRecorder * | getErrorRecorder () const noexcept |
Get the ErrorRecorder assigned to this interface. More... | |
bool | executeV2 (void *const *bindings) noexcept |
Synchronously execute a network. More... | |
bool | setOptimizationProfileAsync (int32_t profileIndex, cudaStream_t stream) noexcept |
Select an optimization profile for the current context with async semantics. More... | |
void | setEnqueueEmitsProfile (bool enqueueEmitsProfile) noexcept |
Set whether enqueue emits layer timing to the profiler. More... | |
bool | getEnqueueEmitsProfile () const noexcept |
Get the enqueueEmitsProfile state. More... | |
bool | reportToProfiler () const noexcept |
Calculate layer timing info for the current optimization profile in IExecutionContext and update the profiler after one iteration of inference launch. More... | |
bool | setTensorAddress (char const *tensorName, void *data) noexcept |
Set memory address for given input or output tensor. More... | |
void const * | getTensorAddress (char const *tensorName) const noexcept |
Get memory address bound to given input or output tensor, or nullptr if the provided name does not map to an input or output tensor. More... | |
bool | setOutputTensorAddress (char const *tensorName, void *data) noexcept |
Set the memory address for a given output tensor. More... | |
bool | setInputTensorAddress (char const *tensorName, void const *data) noexcept |
Set memory address for given input. More... | |
void * | getOutputTensorAddress (char const *tensorName) const noexcept |
Get memory address for given output. More... | |
int32_t | inferShapes (int32_t nbMaxNames, char const **tensorNames) noexcept |
Run shape calculations. More... | |
size_t | updateDeviceMemorySizeForShapes () noexcept |
Recompute the internal activation buffer sizes based on the current input shapes, and return the total amount of memory required. More... | |
bool | setInputConsumedEvent (cudaEvent_t event) noexcept |
Mark input as consumed. More... | |
cudaEvent_t | getInputConsumedEvent () const noexcept |
The event associated with consuming the input. More... | |
bool | setOutputAllocator (char const *tensorName, IOutputAllocator *outputAllocator) noexcept |
Set output allocator to use for output tensor of given name. Pass nullptr to outputAllocator to unset. The allocator is called by enqueueV3(). More... | |
IOutputAllocator * | getOutputAllocator (char const *tensorName) const noexcept |
Get output allocator associated with output tensor of given name, or nullptr if the provided name does not map to an output tensor. More... | |
int64_t | getMaxOutputSize (char const *tensorName) const noexcept |
Get upper bound on an output tensor's size, in bytes, based on the current optimization profile and input dimensions. More... | |
bool | setTemporaryStorageAllocator (IGpuAllocator *allocator) noexcept |
Specify allocator to use for internal temporary storage. More... | |
IGpuAllocator * | getTemporaryStorageAllocator () const noexcept |
Get allocator set by setTemporaryStorageAllocator. More... | |
bool | enqueueV3 (cudaStream_t stream) noexcept |
Enqueue inference on a stream. More... | |
void | setPersistentCacheLimit (size_t size) noexcept |
Set the maximum size for persistent cache usage. More... | |
size_t | getPersistentCacheLimit () const noexcept |
Get the maximum size for persistent cache usage. More... | |
bool | setNvtxVerbosity (ProfilingVerbosity verbosity) noexcept |
Set the verbosity of the NVTX markers in the execution context. More... | |
ProfilingVerbosity | getNvtxVerbosity () const noexcept |
Get the NVTX verbosity of the execution context. More... | |
void | setAuxStreams (cudaStream_t *auxStreams, int32_t nbStreams) noexcept |
Set the auxiliary streams that TensorRT should launch kernels on in the next enqueueV3() call. More... | |
bool | setDebugListener (IDebugListener *listener) noexcept |
Set DebugListener for this execution context. More... | |
IDebugListener * | getDebugListener () noexcept |
Get the DebugListener of this execution context. More... | |
bool | setTensorDebugState (char const *name, bool flag) noexcept |
Set debug state of tensor given the tensor name. More... | |
bool | setAllTensorsDebugState (bool flag) noexcept |
bool | getDebugState (char const *name) const noexcept |
Protected Attributes | |
apiv::VExecutionContext * | mImpl |
Additional Inherited Members | |
![]() | |
INoCopy ()=default | |
virtual | ~INoCopy ()=default |
INoCopy (INoCopy const &other)=delete | |
INoCopy & | operator= (INoCopy const &other)=delete |
INoCopy (INoCopy &&other)=delete | |
INoCopy & | operator= (INoCopy &&other)=delete |
Context for executing inference using an engine, with functionally unsafe features.
Multiple execution contexts may exist for one ICudaEngine instance, allowing the same engine to be used for the execution of multiple batches simultaneously. If the engine supports dynamic shapes, each execution context in concurrent use must use a separate optimization profile.
|
virtualdefaultnoexcept |
|
inlinenoexcept |
Whether all dynamic dimensions of input tensors have been specified.
Trivially true if network has no dynamically shaped input tensors.
Does not work with name-base interfaces eg. IExecutionContext::setInputShape(). Use IExecutionContext::inferShapes() instead.
|
inlinenoexcept |
Whether all input shape bindings have been specified.
Trivially true if network has no input shape bindings.
Does not work with name-base interfaces eg. IExecutionContext::setInputShape(). Use IExecutionContext::inferShapes() instead.
|
inlinenoexcept |
Enqueue inference on a stream.
stream | A CUDA stream on which the inference kernels will be enqueued. |
Modifying or releasing memory that has been registered for the tensors before stream synchronization or the event passed to setInputConsumedEvent has been being triggered results in undefined behavior. Input tensor can be released after the setInputConsumedEvent whereas output tensors require stream synchronization.
|
inlinenoexcept |
Synchronously execute a network.
This method requires an array of input and output buffers. The mapping from indices to tensor names can be queried using ICudaEngine::getIOTensorName().
bindings | An array of pointers to input and output buffers for the network. |
|
inlinenoexcept |
Get the DebugListener of this execution context.
|
inlinenoexcept |
Get the debug state.
|
inlinenoexcept |
Get the debug sync flag.
|
inlinenoexcept |
Get the associated engine.
|
inlinenoexcept |
Get the enqueueEmitsProfile state.
|
inlinenoexcept |
Get the ErrorRecorder assigned to this interface.
Retrieves the assigned error recorder object for the given class. A nullptr will be returned if an error handler has not been set.
|
inlinenoexcept |
The event associated with consuming the input.
|
inlinenoexcept |
Get upper bound on an output tensor's size, in bytes, based on the current optimization profile and input dimensions.
If the profile or input dimensions are not yet set, or the provided name does not map to an output, returns -1.
tensorName | The name of an output tensor. |
|
inlinenoexcept |
Return the name of the execution context.
|
inlinenoexcept |
Get the NVTX verbosity of the execution context.
|
inlinenoexcept |
Get the index of the currently selected optimization profile.
If the profile index has not been set yet (implicitly to 0 if no other execution context has been set to profile 0, or explicitly for all subsequent contexts), an invalid value of -1 will be returned and all calls to enqueueV3()/executeV2() will fail until a valid profile index has been set. This behavior is deprecated in TensorRT 8.6, all profiles will default to optimization profile 0 and -1 will no longer be returned.
|
inlinenoexcept |
Get output allocator associated with output tensor of given name, or nullptr if the provided name does not map to an output tensor.
|
inlinenoexcept |
Get memory address for given output.
tensorName | The name of an output tensor. |
If only a (void const*) pointer is needed, an alternative is to call method getTensorAddress().
|
inlinenoexcept |
Get the maximum size for persistent cache usage.
|
inlinenoexcept |
Get the profiler.
|
inlinenoexcept |
Get allocator set by setTemporaryStorageAllocator.
Returns a nullptr if a nullptr was passed with setTemporaryStorageAllocator().
|
inlinenoexcept |
Get memory address bound to given input or output tensor, or nullptr if the provided name does not map to an input or output tensor.
tensorName | The name of an input or output tensor. |
Use method getOutputTensorAddress() if a non-const pointer for an output tensor is required.
|
inlinenoexcept |
Return the shape of the given input or output.
tensorName | The name of an input or output tensor. |
Return Dims{-1, {}} if the provided name does not map to an input or output tensor. Otherwise return the shape of the input or output tensor.
A dimension in an input tensor will have a -1 wildcard value if all the following are true:
A dimension in an output tensor will have a -1 wildcard value if the dimension depends on values of execution tensors OR if all the following are true:
An output tensor may also have -1 wildcard dimensions if its shape depends on values of tensors supplied to enqueueV3().
If the request is for the shape of an output tensor with runtime dimensions, all input tensors with isShapeInferenceIO() = true should have their value already set, since these values might be needed to compute the output shape.
Examples of an input dimension that is implicitly constrained to a single value:
|
inlinenoexcept |
Return the strides of the buffer for the given tensor name.
The strides are in units of elements, not components or bytes. For example, for TensorFormat::kHWC8, a stride of one spans 8 scalars.
Note that strides can be different for different execution contexts with dynamic shapes.
If the provided name does not map to an input or output tensor, or there are dynamic dimensions that have not been set yet, return Dims{-1, {}}
tensorName | The name of an input or output tensor. |
|
inlinenoexcept |
Run shape calculations.
nbMaxNames | Maximum number of names to write to tensorNames. When the return value is a positive value n and tensorNames != nullptr, the names of min(n,nbMaxNames) insufficiently specified input tensors are written to tensorNames. |
tensorNames | Buffer in which to place names of insufficiently specified input tensors. |
An input tensor is insufficiently specified if either of the following is true:
If an output tensor has isShapeInferenceIO(t)=true and its address has been specified, then its value is written.
Returns -1 if tensorNames == nullptr and nbMaxNames != 0. Returns -1 if nbMaxNames < 0. Returns -1 if a tensor's dimensions are invalid, e.g. a tensor ends up with a negative dimension.
|
inlinenoexcept |
Calculate layer timing info for the current optimization profile in IExecutionContext and update the profiler after one iteration of inference launch.
If IExecutionContext::getEnqueueEmitsProfile() returns true, the enqueue function will calculate layer timing implicitly if a profiler is provided. This function returns true and does nothing.
If IExecutionContext::getEnqueueEmitsProfile() returns false, the enqueue function will record the CUDA event timers if a profiler is provided. But it will not perform the layer timing calculation. IExecutionContext::reportToProfiler() needs to be called explicitly to calculate layer timing for the previous inference launch.
In the CUDA graph launch scenario, it will record the same set of CUDA events as in regular enqueue functions if the graph is captured from an IExecutionContext with profiler enabled. This function needs to be called after graph launch to report the layer timing info to the profiler.
|
inlinenoexcept |
Turn the debug state of all debug tensors on or off.
flag | true if turning on debug state, false if turning off debug state. |
The default is off.
|
inlinenoexcept |
Set the auxiliary streams that TensorRT should launch kernels on in the next enqueueV3() call.
If set, TensorRT will launch the kernels that are supposed to run on the auxiliary streams using the streams provided by the user with this API. If this API is not called before the enqueueV3() call, then TensorRT will use the auxiliary streams created by TensorRT internally.
TensorRT will always insert event synchronizations between the main stream provided via enqueueV3() call and the auxiliary streams:
auxStreams | The pointer to an array of cudaStream_t with the array length equal to nbStreams. |
nbStreams | The number of auxiliary streams provided. If nbStreams is greater than engine->getNbAuxStreams() , then only the first engine->getNbAuxStreams() streams will be used. If nbStreams is less than engine->getNbAuxStreams() , such as setting nbStreams to 0, then TensorRT will use the provided streams for the first nbStreams auxiliary streams, and will create additional streams internally for the rest of the auxiliary streams. |
|
inlinenoexcept |
Set DebugListener for this execution context.
listener | DebugListener for this execution context. |
|
inlinenoexcept |
Set the debug sync flag.
If this flag is set to true, the engine will log the successful execution for each kernel during executeV2(). It has no effect when using enqueueV3().
|
inlinenoexcept |
Set the device memory for use by this execution context.
The memory must be aligned with CUDA memory alignment property (using cudaGetDeviceProperties()), and its size must be large enough for performing inference with the given network inputs. getDeviceMemorySize() and getDeviceMemorySizeForProfile() report upper bounds of the size. Setting memory to nullptr is acceptable if the reported size is 0. If using enqueueV3() to run the network, the memory is in use from the invocation of enqueueV3() until network execution is complete. If using executeV2(), it is in use until executeV2() returns. Releasing or otherwise using the memory for other purposes, including using it in another execution context running in parallel, during this time will result in undefined behavior.
|
inlinenoexcept |
Set the device memory and its corresponding size for use by this execution context.
The memory must be aligned with CUDA memory alignment property (using cudaGetDeviceProperties()), and its size must be large enough for performing inference with the given network inputs. getDeviceMemorySize() and getDeviceMemorySizeForProfile() report upper bounds of the size. Setting memory to nullptr is acceptable if the reported size is 0. If using enqueueV3() to run the network, the memory is in use from the invocation of enqueueV3() until network execution is complete. If using executeV2(), it is in use until executeV2() returns. Releasing or otherwise using the memory for other purposes, including using it in another execution context running in parallel, during this time will result in undefined behavior.
|
inlinenoexcept |
Set whether enqueue emits layer timing to the profiler.
If set to true (default), enqueue is synchronous and does layer timing profiling implicitly if there is a profiler attached. If set to false, enqueue will be asynchronous if there is a profiler attached. An extra method reportToProfiler() needs to be called to obtain the profiling data and report to the profiler attached.
|
inlinenoexcept |
Set the ErrorRecorder for this interface.
Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution. This function will call incRefCount of the registered ErrorRecorder at least once. Setting recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if a recorder has been registered.
If an error recorder is not set, messages will be sent to the global log stream.
recorder | The error recorder to register with this interface. |
|
inlinenoexcept |
Mark input as consumed.
event | The CUDA event that is triggered after all input tensors have been consumed. |
Passing event==nullptr removes whatever event was set, if any.
|
inlinenoexcept |
Set shape of given input.
tensorName | The name of an input tensor. |
dims | The shape of an input tensor. |
Each dimension must agree with the network dimension unless the latter was -1.
|
inlinenoexcept |
Set memory address for given input.
tensorName | The name of an input tensor. |
data | The pointer (void const*) to the const data owned by the user. |
Input addresses can also be set using method setTensorAddress, which requires a (void*).
See description of method setTensorAddress() for alignment and data type constraints.
|
inlinenoexcept |
Set the name of the execution context.
This method copies the name string.
|
inlinenoexcept |
Set the verbosity of the NVTX markers in the execution context.
Building with kDETAILED verbosity will generally increase latency in enqueueV3(). Call this method to select NVTX verbosity in this execution context at runtime.
The default is the verbosity with which the engine was built, and the verbosity may not be raised above that level.
This function does not affect how IEngineInspector interacts with the engine.
verbosity | The verbosity of the NVTX markers. |
|
inlinenoexcept |
Select an optimization profile for the current context with async semantics.
profileIndex | Index of the profile. The value must lie between 0 and getEngine().getNbOptimizationProfiles() - 1 |
stream | A CUDA stream on which the cudaMemcpyAsyncs may be enqueued |
When an optimization profile is switched via this API, TensorRT may require that data is copied via cudaMemcpyAsync. It is the application’s responsibility to guarantee that synchronization between the profile sync stream and the enqueue stream occurs.
The selected profile will be used in subsequent calls to executeV2()/enqueueV3(). If the associated CUDA engine has inputs with dynamic shapes, the optimization profile must be set with its corresponding profileIndex before calling execute or enqueue. The newly created execution context will be assigned optimization profile 0.
If the associated CUDA engine does not have inputs with dynamic shapes, this method need not be called, in which case the default profile index of 0 will be used.
setOptimizationProfileAsync() must be called before calling setInputShape() for all dynamic input tensors or input shape tensors, which in turn must be called before executeV2()/enqueueV3().
|
inlinenoexcept |
Set output allocator to use for output tensor of given name. Pass nullptr to outputAllocator to unset. The allocator is called by enqueueV3().
tensorName | The name of an output tensor. |
outputAllocator | IOutputAllocator for the tensors. |
|
inlinenoexcept |
Set the memory address for a given output tensor.
tensorName | The name of an output tensor. |
data | The pointer to the buffer to which to write the output. |
Output addresses can also be set using method setTensorAddress. This method is provided for applications which prefer to use different methods for setting input and output tensors.
See setTensorAddress() for alignment and data type constraints.
|
inlinenoexcept |
Set the maximum size for persistent cache usage.
This function sets the maximum persistent L2 cache that this execution context may use for activation caching. Activation caching is not supported on all architectures - see "How TensorRT uses Memory" in the developer guide for details
size | the size of persistent cache limitation in bytes. The default is 0 Bytes. |
|
inlinenoexcept |
Set the profiler.
|
inlinenoexcept |
Specify allocator to use for internal temporary storage.
This allocator is used only by enqueueV3() for temporary storage whose size cannot be predicted ahead of enqueueV3(). It is not used for output tensors, because memory allocation for those is allocated by the allocator set by setOutputAllocator(). All memory allocated is freed by the time enqueueV3() returns.
allocator | pointer to allocator to use. Pass nullptr to revert to using TensorRT's default allocator. |
|
inlinenoexcept |
Set memory address for given input or output tensor.
tensorName | The name of an input or output tensor. |
data | The pointer (void*) to the data owned by the user. |
An address defaults to nullptr. Pass data=nullptr to reset to the default state.
Return false if the provided name does not map to an input or output tensor.
If an input pointer has type (void const*), use setInputTensorAddress() instead.
Before calling enqueueV3(), each input must have a non-null address and each output must have a non-null address or an IOutputAllocator to set it later.
If the TensorLocation of the tensor is kHOST:
If the TensorLocation of the tensor is kDEVICE:
If getTensorShape(name) reports a -1 for any dimension of an output after all input shapes have been set, use setOutputAllocator() to associate an IOutputAllocator to which the dimensions will be reported when known.
Calling both setTensorAddress and setOutputAllocator() for the same output is allowed, and can be useful for preallocating memory, and then reallocating if it's not big enough.
The pointer must have at least 256-byte alignment.
|
inlinenoexcept |
Set debug state of tensor given the tensor name.
Turn the debug state of a tensor on or off. A tensor with the parameter tensor name must exist in the network, and the tensor must have been marked as a debug tensor during build time. Otherwise, an error is thrown.
name | Name of target tensor. |
flag | True if turning on debug state, false if turning off debug state of tensor The default is off. |
|
inlinenoexcept |
Recompute the internal activation buffer sizes based on the current input shapes, and return the total amount of memory required.
Users can allocate the device memory based on the size returned and provided the memory to TRT with IExecutionContext::setDeviceMemory(). Must specify all input shapes and the optimization profile to use before calling this function, otherwise the partition will be invalidated.
|
protected |
Copyright © 2024 NVIDIA Corporation
Privacy Policy |
Manage My Privacy |
Do Not Sell or Share My Data |
Terms of Service |
Accessibility |
Corporate Policies |
Product Security |
Contact