An engine for executing inference on a built network, with functionally unsafe features. More...

#include <NvInferRuntime.h>

Inheritance diagram for nvinfer1::ICudaEngine:

Public Member Functions
virtual	~ICudaEngine () noexcept=default

Dims	getTensorShape (char const *tensorName) const noexcept
	Get shape of an input or output tensor. More...

DataType	getTensorDataType (char const *tensorName) const noexcept
	Determine the required data type for a buffer from its tensor name. More...

int32_t	getNbLayers () const noexcept
	Get the number of layers in the network. More...

IHostMemory *	serialize () const noexcept
	Serialize the network to a stream. More...

IExecutionContext *	createExecutionContext (ExecutionContextAllocationStrategy strategy=ExecutionContextAllocationStrategy::kSTATIC) noexcept
	Create an execution context and specify the strategy for allocating internal activation memory. More...

TensorLocation	getTensorLocation (char const *tensorName) const noexcept
	Get whether an input or output tensor must be on GPU or CPU. More...

bool	isShapeInferenceIO (char const *tensorName) const noexcept
	True if tensor is required as input for shape calculations or is output from shape calculations. More...

TensorIOMode	getTensorIOMode (char const *tensorName) const noexcept
	Determine whether a tensor is an input or output tensor. More...

TRT_DEPRECATED IExecutionContext *	createExecutionContextWithoutDeviceMemory () noexcept
	create an execution context without any device memory allocated More...

TRT_DEPRECATED size_t	getDeviceMemorySize () const noexcept
	Return the maximum device memory required by the context over all profiles. More...

TRT_DEPRECATED size_t	getDeviceMemorySizeForProfile (int32_t profileIndex) const noexcept
	Return the maximum device memory required by the context for a profile. More...

int64_t	getDeviceMemorySizeV2 () const noexcept
	Return the maximum device memory required by the context over all profiles. More...

int64_t	getDeviceMemorySizeForProfileV2 (int32_t profileIndex) const noexcept
	Return the maximum device memory required by the context for a profile. More...

bool	isRefittable () const noexcept
	Return true if an engine can be refit. More...

int32_t	getTensorBytesPerComponent (char const *tensorName) const noexcept
	Return the number of bytes per component of an element, or -1 if the tensor is not vectorized or provided name does not map to an input or output tensor. More...

int32_t	getTensorBytesPerComponent (char const *tensorName, int32_t profileIndex) const noexcept
	Return the number of bytes per component of an element given of given profile, or -1 if the tensor is not vectorized or provided name does not map to an input or output tensor. More...

int32_t	getTensorComponentsPerElement (char const *tensorName) const noexcept
	Return the number of components included in one element, or -1 if tensor is not vectorized or if the provided name does not map to an input or output tensor. More...

int32_t	getTensorComponentsPerElement (char const *tensorName, int32_t profileIndex) const noexcept
	Return the number of components included in one element of given profile, or -1 if tensor is not vectorized or the provided name does not map to an input or output tensor. More...

TensorFormat	getTensorFormat (char const *tensorName) const noexcept
	Return the tensor format, or TensorFormat::kLINEAR if the provided name does not map to an input or output tensor. More...

TensorFormat	getTensorFormat (char const *tensorName, int32_t profileIndex) const noexcept
	Return the tensor format of given profile, or TensorFormat::kLINEAR if the provided name does not map to an input or output tensor. More...

char const *	getTensorFormatDesc (char const *tensorName) const noexcept
	Return the human readable description of the tensor format, or empty string if the provided name does not map to an input or output tensor. More...

char const *	getTensorFormatDesc (char const *tensorName, int32_t profileIndex) const noexcept
	Return the human readable description of the tensor format of given profile, or empty string if the provided name does not map to an input or output tensor. More...

int32_t	getTensorVectorizedDim (char const *tensorName) const noexcept
	Return the dimension index that the buffer is vectorized, or -1 if the provided name does not map to an input or output tensor. More...

int32_t	getTensorVectorizedDim (char const *tensorName, int32_t profileIndex) const noexcept
	Return the dimension index that the buffer is vectorized of given profile, or -1 if the provided name does not map to an input or output tensor. More...

char const *	getName () const noexcept
	Returns the name of the network associated with the engine. More...

int32_t	getNbOptimizationProfiles () const noexcept
	Get the number of optimization profiles defined for this engine. More...

Dims	getProfileShape (char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
	Get the minimum / optimum / maximum dimensions for an input tensor given its name under an optimization profile. More...

int32_t const *	getProfileTensorValues (char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
	Get the minimum / optimum / maximum values (not dimensions) for an input tensor given its name under an optimization profile. These correspond to the values set using IOptimizationProfile::setShapeValues when the engine was built. More...

EngineCapability	getEngineCapability () const noexcept
	Determine what execution capability this engine has. More...

void	setErrorRecorder (IErrorRecorder *recorder) noexcept
	Set the ErrorRecorder for this interface. More...

IErrorRecorder *	getErrorRecorder () const noexcept
	Get the ErrorRecorder assigned to this interface. More...

TRT_DEPRECATED bool	hasImplicitBatchDimension () const noexcept
	Query whether the engine was built with an implicit batch dimension. More...

TacticSources	getTacticSources () const noexcept
	return the tactic sources required by this engine. More...

ProfilingVerbosity	getProfilingVerbosity () const noexcept
	Return the ProfilingVerbosity the builder config was set to when the engine was built. More...

IEngineInspector *	createEngineInspector () const noexcept
	Create a new engine inspector which prints the layer information in an engine or an execution context. More...

int32_t	getNbIOTensors () const noexcept
	Return number of IO tensors. More...

char const *	getIOTensorName (int32_t index) const noexcept
	Return name of an IO tensor. More...

HardwareCompatibilityLevel	getHardwareCompatibilityLevel () const noexcept
	Return the hardware compatibility level of this engine. More...

int32_t	getNbAuxStreams () const noexcept
	Return the number of auxiliary streams used by this engine. More...

ISerializationConfig *	createSerializationConfig () noexcept
	Create a serialization configuration object. More...

IHostMemory *	serializeWithConfig (ISerializationConfig &config) const noexcept
	Serialize the network to a stream with the provided SerializationConfig. More...

TRT_DEPRECATED bool	setWeightStreamingBudget (int64_t gpuMemoryBudget) noexcept
	Limit the maximum amount of GPU memory usable for network weights in bytes. More...

TRT_DEPRECATED int64_t	getWeightStreamingBudget () const noexcept
	Returns the current weight streaming device memory budget in bytes. More...

TRT_DEPRECATED int64_t	getMinimumWeightStreamingBudget () const noexcept
	The minimum number of bytes of GPU memory required by network weights for successful weight streaming. More...

int64_t	getStreamableWeightsSize () const noexcept
	Get the total size in bytes of all streamable weights. More...

bool	setWeightStreamingBudgetV2 (int64_t gpuMemoryBudget) noexcept
	Limit the maximum amount of GPU memory usable for network weights in bytes. More...

int64_t	getWeightStreamingBudgetV2 () const noexcept
	Returns the current weight streaming device memory budget in bytes. More...

int64_t	getWeightStreamingAutomaticBudget () const noexcept
	TensorRT automatically determines an ideal budget for the model to run. More...

int64_t	getWeightStreamingScratchMemorySize () const noexcept
	Returns the size of the scratch memory required by the current weight streaming budget. More...

bool	isDebugTensor (char const *name) const noexcept
	Check if a tensor is marked as a debug tensor. More...

Protected Attributes
apiv::VCudaEngine *	mImpl

Additional Inherited Members
Protected Member Functions inherited from nvinfer1::INoCopy
	INoCopy ()=default

virtual	~INoCopy ()=default

	INoCopy (INoCopy const &other)=delete

INoCopy &	operator= (INoCopy const &other)=delete

	INoCopy (INoCopy &&other)=delete

INoCopy &	operator= (INoCopy &&other)=delete

Detailed Description

An engine for executing inference on a built network, with functionally unsafe features.

Warning: Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.

Constructor & Destructor Documentation

◆ ~ICudaEngine()

virtual nvinfer1::ICudaEngine::~ICudaEngine ( )

virtualdefaultnoexcept

Member Function Documentation

◆ createEngineInspector()

IEngineInspector * nvinfer1::ICudaEngine::createEngineInspector ( ) const

inlinenoexcept

Create a new engine inspector which prints the layer information in an engine or an execution context.

See also: IEngineInspector.

◆ createExecutionContext()

IExecutionContext * nvinfer1::ICudaEngine::createExecutionContext ( ExecutionContextAllocationStrategy strategy = ExecutionContextAllocationStrategy::kSTATIC )

inlinenoexcept

Create an execution context and specify the strategy for allocating internal activation memory.

The default value for the allocation strategy is ExecutionContextAllocationStrategy::kSTATIC, which means the context will pre-allocate a block of device memory that is sufficient for all profiles. The newly created execution context will be assigned optimization profile 0. If an error recorder has been set for the engine, it will also be passed to the execution context.

See also: IExecutionContext; IExecutionContext::setOptimizationProfileAsync(); ExecutionContextAllocationStrategy

◆ createExecutionContextWithoutDeviceMemory()

TRT_DEPRECATED IExecutionContext * nvinfer1::ICudaEngine::createExecutionContextWithoutDeviceMemory ( )

inlinenoexcept

create an execution context without any device memory allocated

The memory for execution of this device context must be supplied by the application.

Deprecated:: Deprecated in TensorRT 10.0. Superseded by createExecutionContext() with parameter.

◆ createSerializationConfig()

ISerializationConfig * nvinfer1::ICudaEngine::createSerializationConfig ( )

inlinenoexcept

Create a serialization configuration object.

See also: ISerializationConfig

◆ getDeviceMemorySize()

TRT_DEPRECATED size_t nvinfer1::ICudaEngine::getDeviceMemorySize ( ) const

inlinenoexcept

Return the maximum device memory required by the context over all profiles.

Deprecated:: Deprecated in TensorRT 10.1. Superseded by getDeviceMemorySizeV2().

See also: IExecutionContext::setDeviceMemory()

◆ getDeviceMemorySizeForProfile()

TRT_DEPRECATED size_t nvinfer1::ICudaEngine::getDeviceMemorySizeForProfile ( int32_t profileIndex ) const

inlinenoexcept

Return the maximum device memory required by the context for a profile.

Deprecated:: Deprecated in TensorRT 10.1. Superseded by getDeviceMemorySizeForProfileV2(int32_t).

See also: IExecutionContext::setDeviceMemoryV2()

◆ getDeviceMemorySizeForProfileV2()

int64_t nvinfer1::ICudaEngine::getDeviceMemorySizeForProfileV2 ( int32_t profileIndex ) const

inlinenoexcept

Return the maximum device memory required by the context for a profile.

This API is stateful, so its call returns different values based on the following calls:

See also: IExecutionContext::setDeviceMemoryV2(); setWeightStreamingBudget(); setWeightStreamingBudgetV2()

◆ getDeviceMemorySizeV2()

int64_t nvinfer1::ICudaEngine::getDeviceMemorySizeV2 ( ) const

inlinenoexcept

Return the maximum device memory required by the context over all profiles.

This API is stateful, so its call returns different values based on the following calls:

See also: IExecutionContext::setDeviceMemoryV2(); setWeightStreamingBudget(); setWeightStreamingBudgetV2()

◆ getEngineCapability()

EngineCapability nvinfer1::ICudaEngine::getEngineCapability ( ) const

inlinenoexcept

Determine what execution capability this engine has.

If the engine has EngineCapability::kSTANDARD, then all engine functionality is valid. If the engine has EngineCapability::kSAFETY, then only the functionality in safe engine is valid. If the engine has EngineCapability::kDLA_STANDALONE, then only serialize, destroy, and const-accessor functions are valid.

Returns: The EngineCapability flag that the engine was built for.

◆ getErrorRecorder()

IErrorRecorder * nvinfer1::ICudaEngine::getErrorRecorder ( ) const

inlinenoexcept

Get the ErrorRecorder assigned to this interface.

Retrieves the assigned error recorder object for the given class. A nullptr will be returned if an error handler has not been set.

Returns: A pointer to the IErrorRecorder object that has been registered.

See also: setErrorRecorder()

◆ getHardwareCompatibilityLevel()

HardwareCompatibilityLevel nvinfer1::ICudaEngine::getHardwareCompatibilityLevel ( ) const

inlinenoexcept

Return the hardware compatibility level of this engine.

Returns: hardwareCompatibilityLevel The level of hardware compatibility.

This is only supported for Ampere and newer architectures.

◆ getIOTensorName()

char const * nvinfer1::ICudaEngine::getIOTensorName ( int32_t index ) const

inlinenoexcept

Return name of an IO tensor.

Parameters

index value between 0 and getNbIOTensors()-1

See also: getNbIOTensors()

◆ getMinimumWeightStreamingBudget()

TRT_DEPRECATED int64_t nvinfer1::ICudaEngine::getMinimumWeightStreamingBudget ( ) const

inlinenoexcept

The minimum number of bytes of GPU memory required by network weights for successful weight streaming.

This is a positive integer for engines with streamable weights because a staging buffer on the GPU is required to temporarily hold the streamed weights. The size of the staging buffer is determined by TensorRT and must be at least as large as the size of the largest streamable weight in the network.

Warning: BuilderFlag::kWEIGHT_STREAMING must be set during engine building.

Returns: The minimum number of bytes of GPU memory required for streaming.

Deprecated:: Deprecated in TensorRT 10.1. The minimum budget is 0 in the V2 APIs.

See also: setWeightStreamingBudget()

◆ getName()

char const * nvinfer1::ICudaEngine::getName ( ) const

inlinenoexcept

Returns the name of the network associated with the engine.

The name is set during network creation and is retrieved after building or deserialization.

See also: INetworkDefinition::setName(), INetworkDefinition::getName()

Returns: A null-terminated C-style string representing the name of the network.

◆ getNbAuxStreams()

int32_t nvinfer1::ICudaEngine::getNbAuxStreams ( ) const

inlinenoexcept

Return the number of auxiliary streams used by this engine.

This number will be less than or equal to the maximum allowed number of auxiliary streams set by IBuilderConfig::setMaxAuxStreams() API call when the engine was built.

Returns: the number of auxiliary streams used by this engine.

See also: IBuilderConfig::setMaxAuxStreams(), IExecutionContext::setAuxStreams()

◆ getNbIOTensors()

int32_t nvinfer1::ICudaEngine::getNbIOTensors ( ) const

inlinenoexcept

Return number of IO tensors.

It is the number of input and output tensors for the network from which the engine was built. The names of the IO tensors can be discovered by calling getIOTensorName(i) for i in 0 to getNbIOTensors()-1.

See also: getIOTensorName()

◆ getNbLayers()

int32_t nvinfer1::ICudaEngine::getNbLayers ( ) const

inlinenoexcept

Get the number of layers in the network.

The number of layers in the network is not necessarily the number in the original network definition, as layers may be combined or eliminated as the engine is optimized. This value can be useful when building per-layer tables, such as when aggregating profiling data over a number of executions.

Returns: The number of layers in the network.

◆ getNbOptimizationProfiles()

int32_t nvinfer1::ICudaEngine::getNbOptimizationProfiles ( ) const

inlinenoexcept

Get the number of optimization profiles defined for this engine.

Returns: Number of optimization profiles. It is always at least 1.

See also: IExecutionContext::setOptimizationProfileAsync()

◆ getProfileShape()

Dims nvinfer1::ICudaEngine::getProfileShape	(	char const *	tensorName,
		int32_t	profileIndex,
		OptProfileSelector	select
	)		const

inlinenoexcept

Get the minimum / optimum / maximum dimensions for an input tensor given its name under an optimization profile.

Parameters

tensorName	The name of an input tensor.
profileIndex	The profile index, which must be between 0 and getNbOptimizationProfiles()-1.
select	Whether to query the minimum, optimum, or maximum dimensions for this input tensor.

Returns: The minimum / optimum / maximum dimensions for an input tensor in this profile. If the profileIndex is invalid or provided name does not map to an input tensor, return Dims{-1, {}}

Warning: The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.

◆ getProfileTensorValues()

int32_t const * nvinfer1::ICudaEngine::getProfileTensorValues	(	char const *	tensorName,
		int32_t	profileIndex,
		OptProfileSelector	select
	)		const

inlinenoexcept

Get the minimum / optimum / maximum values (not dimensions) for an input tensor given its name under an optimization profile. These correspond to the values set using IOptimizationProfile::setShapeValues when the engine was built.

Parameters

tensorName	The name of an input tensor.
profileIndex	The profile index, which must be between 0 and getNbOptimizationProfiles()-1.
select	Whether to query the minimum, optimum, or maximum values for this input tensor.

Returns: The minimum / optimum / maximum values for an input tensor in this profile. If the profileIndex is invalid or the provided name does not map to an input tensor, or the tensor is not a shape binding, return nullptr.

Warning: The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.

◆ getProfilingVerbosity()

ProfilingVerbosity nvinfer1::ICudaEngine::getProfilingVerbosity ( ) const

inlinenoexcept

Return the ProfilingVerbosity the builder config was set to when the engine was built.

Returns: the profiling verbosity the builder config was set to when the engine was built.

See also: IBuilderConfig::setProfilingVerbosity()

◆ getStreamableWeightsSize()

int64_t nvinfer1::ICudaEngine::getStreamableWeightsSize ( ) const

inlinenoexcept

Get the total size in bytes of all streamable weights.

The set of streamable weights is a subset of all network weights. The total size may exceed free GPU memory.

Returns: The total size in bytes of all streamable weights. Returns 0 if BuilderFlag::kWEIGHT_STREAMING is unset during engine building.

See also: setWeightStreamingBudget()

◆ getTacticSources()

TacticSources nvinfer1::ICudaEngine::getTacticSources ( ) const

inlinenoexcept

return the tactic sources required by this engine.

The value returned is equal to zero or more tactics sources set at build time via setTacticSources() in IBuilderConfig. Sources set by the latter but not returned by ICudaEngine::getTacticSources do not reduce overall engine execution time, and can be removed from future builds to reduce build time.

See also: IBuilderConfig::setTacticSources()

◆ getTensorBytesPerComponent() [1/2]

int32_t nvinfer1::ICudaEngine::getTensorBytesPerComponent ( char const * tensorName ) const

inlinenoexcept

Return the number of bytes per component of an element, or -1 if the tensor is not vectorized or provided name does not map to an input or output tensor.

The vector component size is returned if getTensorVectorizedDim(tensorName) != -1.

Parameters

tensorName The name of an input or output tensor.

Warning: The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.; The function can only return the result of profile 0, and issues a warning message when there are multiple profiles in the engine, use getTensorBytesPerComponent with profileIndex when there are multiple profiles.

See also: getTensorVectorizedDim(); getTensorBytesPerComponent(tensorName, profileIndex)

◆ getTensorBytesPerComponent() [2/2]

int32_t nvinfer1::ICudaEngine::getTensorBytesPerComponent	(	char const *	tensorName,
		int32_t	profileIndex
	)		const

inlinenoexcept

Return the number of bytes per component of an element given of given profile, or -1 if the tensor is not vectorized or provided name does not map to an input or output tensor.

The vector component size is returned if getTensorVectorizedDim(tensorName, profileIndex) != -1.

Parameters

tensorName	The name of an input or output tensor.
profileIndex	The profile index to query

Warning: The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.

See also: getTensorVectorizedDim(tensorName, profileIndex)

◆ getTensorComponentsPerElement() [1/2]

int32_t nvinfer1::ICudaEngine::getTensorComponentsPerElement ( char const * tensorName ) const

inlinenoexcept

Return the number of components included in one element, or -1 if tensor is not vectorized or if the provided name does not map to an input or output tensor.

The number of elements in the vectors is returned if getTensorVectorizedDim(tensorName) != -1.

Parameters

tensorName The name of an input or output tensor.

Warning: The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.; The function can only return the result of profile 0, and issues a warning message when there are multiple profiles in the engine, use getTensorComponentsPerElement with profileIndex when there are multiple profiles.

See also: getTensorVectorizedDim(); getTensorComponentsPerElement(tensorName, profileIndex)

◆ getTensorComponentsPerElement() [2/2]

int32_t nvinfer1::ICudaEngine::getTensorComponentsPerElement	(	char const *	tensorName,
		int32_t	profileIndex
	)		const

inlinenoexcept

Return the number of components included in one element of given profile, or -1 if tensor is not vectorized or the provided name does not map to an input or output tensor.

The number of elements in the vectors is returned if getTensorVectorizedDim(tensorName, profileIndex) != -1.

Parameters

tensorName	The name of an input or output tensor.
profileIndex	The profile index to query

Warning: The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.

See also: getTensorVectorizedDim(tensorName, profileIndex)

◆ getTensorDataType()

DataType nvinfer1::ICudaEngine::getTensorDataType ( char const * tensorName ) const

inlinenoexcept

Determine the required data type for a buffer from its tensor name.

Parameters

tensorName The name of an input or output tensor.

Returns: The type of the data in the buffer, or DataType::kFLOAT if the provided name does not map to an input or output tensor.

Warning: The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.

◆ getTensorFormat() [1/2]

TensorFormat nvinfer1::ICudaEngine::getTensorFormat ( char const * tensorName ) const

inlinenoexcept

Return the tensor format, or TensorFormat::kLINEAR if the provided name does not map to an input or output tensor.

Warning: The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.; This API can only return the tensor format of profile 0, and issues a warning message when there are multiple profiles in the engine, use getTensorFormat with profileIndex when there are multiple profiles.

See also: getTensorFormat(tensorName, profileIndex)

◆ getTensorFormat() [2/2]

TensorFormat nvinfer1::ICudaEngine::getTensorFormat	(	char const *	tensorName,
		int32_t	profileIndex
	)		const

inlinenoexcept

Return the tensor format of given profile, or TensorFormat::kLINEAR if the provided name does not map to an input or output tensor.

Parameters

tensorName	The name of an input or output tensor.
profileIndex	The profile index to query the format for.

Warning: The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.

◆ getTensorFormatDesc() [1/2]

char const * nvinfer1::ICudaEngine::getTensorFormatDesc ( char const * tensorName ) const

inlinenoexcept

Return the human readable description of the tensor format, or empty string if the provided name does not map to an input or output tensor.

The description includes the order, vectorization, data type, and strides. Examples are shown as follows: Example 1: kCHW + FP32 "Row-major linear FP32 format" Example 2: kCHW2 + FP16 "Two-wide channel vectorized row-major FP16 format" Example 3: kHWC8 + FP16 + Line Stride = 32 "Channel major FP16 format where C % 8 == 0 and H Stride % 32 == 0"

Parameters

tensorName The name of an input or output tensor.

Warning: The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.; The function can only return the result of profile 0, and issues a warning message when there are multiple profiles in the engine, use getTensorFormatDesc with profileIndex when there are multiple profiles.

◆ getTensorFormatDesc() [2/2]

char const * nvinfer1::ICudaEngine::getTensorFormatDesc	(	char const *	tensorName,
		int32_t	profileIndex
	)		const

inlinenoexcept

Return the human readable description of the tensor format of given profile, or empty string if the provided name does not map to an input or output tensor.

The description includes the order, vectorization, data type, and strides. Examples are shown as follows: Example 1: kCHW + FP32 "Row-major linear FP32 format" Example 2: kCHW2 + FP16 "Two-wide channel vectorized row-major FP16 format" Example 3: kHWC8 + FP16 + Line Stride = 32 "Channel major FP16 format where C % 8 == 0 and H Stride % 32 == 0"

Parameters

tensorName	The name of an input or output tensor.
profileIndex	The profile index to query the format for.

Warning: The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.

◆ getTensorIOMode()

TensorIOMode nvinfer1::ICudaEngine::getTensorIOMode ( char const * tensorName ) const

inlinenoexcept

Determine whether a tensor is an input or output tensor.

Parameters

tensorName The name of an input or output tensor.

Returns: kINPUT if tensorName is an input, kOUTPUT if tensorName is an output, or kNONE if neither.

Warning: The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.

◆ getTensorLocation()

TensorLocation nvinfer1::ICudaEngine::getTensorLocation ( char const * tensorName ) const

inlinenoexcept

Get whether an input or output tensor must be on GPU or CPU.

Parameters

tensorName The name of an input or output tensor.

Returns: TensorLocation::kDEVICE if tensorName must be on GPU, or TensorLocation::kHOST if on CPU, or TensorLocation::kDEVICE if the provided name does not map to an input or output tensor.

The location is established at build time. E.g. shape tensors inputs are typically required to be on the CPU.

Warning: The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.

◆ getTensorShape()

Dims nvinfer1::ICudaEngine::getTensorShape ( char const * tensorName ) const

inlinenoexcept

Get shape of an input or output tensor.

Parameters

tensorName The name of an input or output tensor.

Returns: shape of the tensor, with -1 in place of each dynamic runtime dimension, or Dims{-1, {}} if the provided name does not map to an input or output tensor.

Warning: The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.

◆ getTensorVectorizedDim() [1/2]

int32_t nvinfer1::ICudaEngine::getTensorVectorizedDim ( char const * tensorName ) const

inlinenoexcept

Return the dimension index that the buffer is vectorized, or -1 if the provided name does not map to an input or output tensor.

Specifically -1 is returned if scalars per vector is 1.

Parameters

tensorName The name of an input or output tensor.

Warning: The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.; The function can only return the result of profile 0, and issues a warning message when there are multiple profiles in the engine, use getTensorVectorizedDim with profileIndex when there are multiple profiles.

◆ getTensorVectorizedDim() [2/2]

int32_t nvinfer1::ICudaEngine::getTensorVectorizedDim	(	char const *	tensorName,
		int32_t	profileIndex
	)		const

inlinenoexcept

Return the dimension index that the buffer is vectorized of given profile, or -1 if the provided name does not map to an input or output tensor.

Specifically -1 is returned if scalars per vector is 1.

Parameters

tensorName	The name of an input.
profileIndex	The profile index to query the format for.

Warning: The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator.

◆ getWeightStreamingAutomaticBudget()

int64_t nvinfer1::ICudaEngine::getWeightStreamingAutomaticBudget ( ) const

inlinenoexcept

TensorRT automatically determines an ideal budget for the model to run.

Warning: BuilderFlag::kWEIGHT_STREAMING must be set during engine building.; The return value may change between TensorRT minor versions.; Setting the returned budget with V1 APIs (setWeightStreamingBudget()) will lead to undefined behavior. Please use V2 APIs.

Returns: The weight streaming budget in bytes. Please set with setWeightStreamingBudgetV2().

See also: BuilderFlag::kWEIGHT_STREAMING; setWeightStreamingBudgetV2()

◆ getWeightStreamingBudget()

TRT_DEPRECATED int64_t nvinfer1::ICudaEngine::getWeightStreamingBudget ( ) const

inlinenoexcept

Returns the current weight streaming device memory budget in bytes.

Warning: BuilderFlag::kWEIGHT_STREAMING must be set during engine building.

Returns: The weight streaming budget in bytes. Please see setWeightStreamingBudget() for the possible values.

Deprecated:: Deprecated in TensorRT 10.1. Superceded by getWeightStreamingBudgetV2().

See also: BuilderFlag::kWEIGHT_STREAMING,; setWeightStreamingBudget(); getMinimumWeightStreamingBudget(); getStreamableWeightsSize()

◆ getWeightStreamingBudgetV2()

int64_t nvinfer1::ICudaEngine::getWeightStreamingBudgetV2 ( ) const

inlinenoexcept

Returns the current weight streaming device memory budget in bytes.

Warning: BuilderFlag::kWEIGHT_STREAMING must be set during engine building.

Returns: The weight streaming budget in bytes. Please see setWeightStreamingBudgetV2() for the possible return values. Returns getStreamableWeightsSize() if weight streaming is disabled.

See also: BuilderFlag::kWEIGHT_STREAMING; setWeightStreamingBudget(); getMinimumWeightStreamingBudget(); getStreamableWeightsSize()

◆ getWeightStreamingScratchMemorySize()

int64_t nvinfer1::ICudaEngine::getWeightStreamingScratchMemorySize ( ) const

inlinenoexcept

Returns the size of the scratch memory required by the current weight streaming budget.

Weight streaming requires small amounts of scratch memory on the GPU to stage CPU weights right before execution. This value is typically much smaller than the total streamable weights size. Each IExecutionContext will then allocate this additional memory or the user can provide the additional memory through getDeviceMemorySizeV2() and IExecutionContext::setDeviceMemoryV2().

The return value of this call depends on

Warning: BuilderFlag::kWEIGHT_STREAMING must be set during engine building.

Returns: The weight streaming scratch memory in bytes. Returns 0 if weight streaming is disabled.

See also: BuilderFlag::kWEIGHT_STREAMING; setWeightStreamingBudgetV2(); getStreamableWeightsSize(); getDeviceMemorySizeV2(); getDeviceMemorySizeForProfileV2(); IExecutionContext::setDeviceMemoryV2()

◆ hasImplicitBatchDimension()

TRT_DEPRECATED bool nvinfer1::ICudaEngine::hasImplicitBatchDimension ( ) const

inlinenoexcept

Query whether the engine was built with an implicit batch dimension.

Returns: Always false since TensorRT 10.0 does not support an implicit batch dimension.

See also: createNetworkV2

Deprecated:: Deprecated in TensorRT 10.0. Implicit batch is no supported since TensorRT 10.0.

◆ isDebugTensor()

bool nvinfer1::ICudaEngine::isDebugTensor ( char const * name ) const

inlinenoexcept

Check if a tensor is marked as a debug tensor.

Determine whether the given name corresponds to a debug tensor.

Returns: True if tensor is a debug tensor, false otherwise.

See also: INetworkDefinition::markDebug

◆ isRefittable()

bool nvinfer1::ICudaEngine::isRefittable ( ) const

inlinenoexcept

Return true if an engine can be refit.

See also: nvinfer1::createInferRefitter()

◆ isShapeInferenceIO()

bool nvinfer1::ICudaEngine::isShapeInferenceIO ( char const * tensorName ) const

inlinenoexcept

True if tensor is required as input for shape calculations or is output from shape calculations.

Return true for either of the following conditions:

The tensor is a network input, and its value is required for IExecutionContext::getTensorShape() to return the shape of a network output.
The tensor is a network output, and inferShape() will compute its values.

For example, if a network uses an input tensor "foo" as an addend to an IElementWiseLayer that computes the "reshape dimensions" for IShuffleLayer, then isShapeInferenceIO("foo") == true. If the network copies said input tensor "foo" to an output "bar", then isShapeInferenceIO("bar") == true and IExecutionContext::inferShapes() will write to "bar".

◆ serialize()

IHostMemory * nvinfer1::ICudaEngine::serialize ( ) const

inlinenoexcept

Serialize the network to a stream.

Returns: A IHostMemory object that contains the serialized engine.

The network may be deserialized with IRuntime::deserializeCudaEngine().

See also: IRuntime::deserializeCudaEngine()

◆ serializeWithConfig()

IHostMemory * nvinfer1::ICudaEngine::serializeWithConfig ( ISerializationConfig & config ) const

inlinenoexcept

Serialize the network to a stream with the provided SerializationConfig.

Returns: An IHostMemory object that contains the serialized engine.

The network may be deserialized with IRuntime::deserializeCudaEngine(). Serializing plan file with SerializationFlag::kEXCLUDE_WEIGHTS requires building the engine with kREFIT or kREFIT_IDENTICAL.

See also: IRuntime::deserializeCudaEngine()

◆ setErrorRecorder()

void nvinfer1::ICudaEngine::setErrorRecorder ( IErrorRecorder * recorder )

inlinenoexcept

Set the ErrorRecorder for this interface.

Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution. This function will call incRefCount of the registered ErrorRecorder at least once. Setting recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if a recorder has been registered.

If an error recorder is not set, messages will be sent to the global log stream.

Parameters

recorder The error recorder to register with this interface.

See also: getErrorRecorder()

◆ setWeightStreamingBudget()

TRT_DEPRECATED bool nvinfer1::ICudaEngine::setWeightStreamingBudget ( int64_t gpuMemoryBudget )

inlinenoexcept

Limit the maximum amount of GPU memory usable for network weights in bytes.

Parameters

gpuMemoryBudget

This parameter may take on 3 types of values: -1: Allows TensorRT to choose the budget according to the streamable weights size. Free CUDA memory will be queried at createExecutionContext() and accordingly:

If streamable weights all fit: weight streaming is not required and disabled.
Otherwise: Budget is set to getMinimumWeightStreamingBudget 0: (default) Disables weight streaming. The execution may fail if the network is too large for GPU memory. >0: The maximum bytes of GPU memory that weights can occupy. It must be bounded by [getMinimumWeightStreamingBudget, free GPU memory)].

By setting a weight limit, users can expect a GPU memory usage reduction of (total bytes for network weights) - gpuMemoryBudget bytes. Maximum memory savings occur when gpuMemoryBudget is set to getMinimumWeightStreamingBudget(). Creating additional IExecutionContexts will increase memory usage by O(getMinimumStreamingBudget()).

Streaming larger amounts of memory will likely result in lower performance except in some boundary cases where streaming weights allows the user to run larger batch sizes. The higher throughput offsets the increased latency in these cases. Tuning the value of the memory limit is recommended for best performance.

Warning: GPU memory for the weights is allocated in this call and will be deallocated by enabling weight streaming or destroying the ICudaEngine.; BuilderFlag::kWEIGHT_STREAMING must be set during engine building.; The weights streaming budget cannot be modified while there are active IExecutionContexts.

Returns: true if the memory limit is valid and the call was successful, false otherwise.

Deprecated:: Deprecated in TensorRT 10.1. Superceded by setWeightStreamingBudgetV2().

See also: BuilderFlag::kWEIGHT_STREAMING; getWeightStreamingBudget(); getMinimumWeightStreamingBudget(); getStreamableWeightsSize()

◆ setWeightStreamingBudgetV2()

bool nvinfer1::ICudaEngine::setWeightStreamingBudgetV2 ( int64_t gpuMemoryBudget )

inlinenoexcept

Limit the maximum amount of GPU memory usable for network weights in bytes.

Parameters

gpuMemoryBudget This parameter must be a non-negative value. 0: Only small amounts of scratch memory will required to run the model. >= getStreamableWeightsSize (default): Disables weight streaming. The execution may fail if the network is too large for GPU memory.

By setting a weight limit, users can expect a GPU memory usage reduction on the order of (total bytes for network weights) - gpuMemoryBudget bytes. Maximum memory savings occur when gpuMemoryBudget is set to 0. Each IExecutionContext will require getWeightStreamingScratchMemorySize() bytes of additional device memory if the engine is streaming its weights (budget < getStreamableWeightsSize()).

Streaming larger amounts of memory will likely result in lower performance except in some boundary cases where streaming weights allows the user to run larger batch sizes. The higher throughput offsets the increased latency in these cases. Tuning the value of the memory limit is recommended for best performance.

Warning: GPU memory for the weights is allocated in this call and will be deallocated by enabling weight streaming or destroying the ICudaEngine.; BuilderFlag::kWEIGHT_STREAMING must be set during engine building.; The weights streaming budget cannot be modified while there are active IExecutionContexts.; Using the V2 weight streaming APIs with V1 APIs (setWeightStreamingBudget(), getWeightStreamingBudget(), getWeightStreamingMinimumBudget()) leads to undefined behavior.

Returns: true if the memory limit is valid and the call was successful, false otherwise.

See also: BuilderFlag::kWEIGHT_STREAMING; getWeightStreamingBudgetV2(); getWeightStreamingScratchMemorySize(); getWeightStreamingAutomaticBudget(); getStreamableWeightsSize()

Member Data Documentation

◆ mImpl

apiv::VCudaEngine* nvinfer1::ICudaEngine::mImpl

protected

The documentation for this class was generated from the following file:

NvInferRuntime.h

Public Member Functions

Protected Attributes

Additional Inherited Members

Detailed Description

Constructor & Destructor Documentation

◆ ~ICudaEngine()

Member Function Documentation

◆ createEngineInspector()

◆ createExecutionContext()

◆ createExecutionContextWithoutDeviceMemory()

◆ createSerializationConfig()

◆ getDeviceMemorySize()

◆ getDeviceMemorySizeForProfile()

◆ getDeviceMemorySizeForProfileV2()

◆ getDeviceMemorySizeV2()

◆ getEngineCapability()

◆ getErrorRecorder()

◆ getHardwareCompatibilityLevel()

◆ getIOTensorName()

◆ getMinimumWeightStreamingBudget()

◆ getName()

◆ getNbAuxStreams()

◆ getNbIOTensors()

◆ getNbLayers()

◆ getNbOptimizationProfiles()

◆ getProfileShape()

◆ getProfileTensorValues()

◆ getProfilingVerbosity()

◆ getStreamableWeightsSize()

◆ getTacticSources()

◆ getTensorBytesPerComponent() [1/2]

◆ getTensorBytesPerComponent() [2/2]

◆ getTensorComponentsPerElement() [1/2]

◆ getTensorComponentsPerElement() [2/2]

◆ getTensorDataType()

◆ getTensorFormat() [1/2]

◆ getTensorFormat() [2/2]

◆ getTensorFormatDesc() [1/2]

◆ getTensorFormatDesc() [2/2]

◆ getTensorIOMode()

◆ getTensorLocation()

◆ getTensorShape()

◆ getTensorVectorizedDim() [1/2]

◆ getTensorVectorizedDim() [2/2]

◆ getWeightStreamingAutomaticBudget()

◆ getWeightStreamingBudget()

◆ getWeightStreamingBudgetV2()

◆ getWeightStreamingScratchMemorySize()

◆ hasImplicitBatchDimension()

◆ isDebugTensor()

◆ isRefittable()

◆ isShapeInferenceIO()

◆ serialize()

◆ serializeWithConfig()

◆ setErrorRecorder()

◆ setWeightStreamingBudget()

◆ setWeightStreamingBudgetV2()

Member Data Documentation

◆ mImpl