TensorRT
7.2.3.4
|
Builds an engine from a network definition. More...
#include <NvInfer.h>
Public Member Functions | |
virtual TRT_DEPRECATED nvinfer1::INetworkDefinition * | createNetwork ()=0 |
Create a network definition object where all tensors have an implicit batch dimension. More... | |
virtual void | setMaxBatchSize (int32_t batchSize)=0 |
Set the maximum batch size. More... | |
virtual int32_t | getMaxBatchSize () const =0 |
Get the maximum batch size. More... | |
virtual TRT_DEPRECATED void | setMaxWorkspaceSize (std::size_t workspaceSize)=0 |
Set the maximum workspace size. More... | |
virtual TRT_DEPRECATED std::size_t | getMaxWorkspaceSize () const =0 |
Get the maximum workspace size. More... | |
virtual TRT_DEPRECATED void | setHalf2Mode (bool mode)=0 |
Set whether half2 mode is used. More... | |
virtual TRT_DEPRECATED bool | getHalf2Mode () const =0 |
Query whether half2 mode is used. More... | |
virtual TRT_DEPRECATED void | setDebugSync (bool sync)=0 |
Set whether the builder should use debug synchronization. More... | |
virtual TRT_DEPRECATED bool | getDebugSync () const =0 |
Query whether the builder will use debug synchronization. More... | |
virtual TRT_DEPRECATED void | setMinFindIterations (int32_t minFind)=0 |
Set the number of minimization iterations used when timing layers. More... | |
virtual TRT_DEPRECATED int32_t | getMinFindIterations () const =0 |
Query the number of minimization iterations. More... | |
virtual TRT_DEPRECATED void | setAverageFindIterations (int32_t avgFind)=0 |
Set the number of averaging iterations used when timing layers. More... | |
virtual TRT_DEPRECATED int32_t | getAverageFindIterations () const =0 |
Query the number of averaging iterations. More... | |
virtual TRT_DEPRECATED nvinfer1::ICudaEngine * | buildCudaEngine (nvinfer1::INetworkDefinition &network)=0 |
Build a CUDA engine from a network definition. More... | |
virtual bool | platformHasFastFp16 () const =0 |
Determine whether the platform has fast native fp16. | |
virtual bool | platformHasFastInt8 () const =0 |
Determine whether the platform has fast native int8. | |
virtual void | destroy ()=0 |
Destroy this object. | |
virtual TRT_DEPRECATED void | setInt8Mode (bool mode)=0 |
Set whether or not quantized 8-bit kernels are permitted. More... | |
virtual TRT_DEPRECATED bool | getInt8Mode () const =0 |
Query whether Int8 mode is used. More... | |
virtual TRT_DEPRECATED void | setInt8Calibrator (IInt8Calibrator *calibrator)=0 |
Set Int8 Calibration interface. More... | |
virtual TRT_DEPRECATED void | setDeviceType (ILayer *layer, DeviceType deviceType)=0 |
Set the device that this layer must execute on. More... | |
virtual TRT_DEPRECATED DeviceType | getDeviceType (const ILayer *layer) const =0 |
Get the device that this layer executes on. More... | |
virtual TRT_DEPRECATED bool | isDeviceTypeSet (const ILayer *layer) const =0 |
whether the DeviceType has been explicitly set for this layer More... | |
virtual TRT_DEPRECATED void | resetDeviceType (ILayer *layer)=0 |
reset the DeviceType for this layer More... | |
virtual TRT_DEPRECATED bool | canRunOnDLA (const ILayer *layer) const =0 |
Checks if a layer can run on DLA. More... | |
virtual TRT_DEPRECATED void | setDefaultDeviceType (DeviceType deviceType)=0 |
Sets the default DeviceType to be used by the builder. It ensures that all the layers that can run on this device will run on it, unless setDeviceType is used to override the default DeviceType for a layer. More... | |
virtual TRT_DEPRECATED DeviceType | getDefaultDeviceType () const =0 |
Get the default DeviceType which was set by setDefaultDeviceType. More... | |
virtual int32_t | getMaxDLABatchSize () const =0 |
Get the maximum batch size DLA can support. For any tensor the total volume of index dimensions combined(dimensions other than CHW) with the requested batch size should not exceed the value returned by this function. More... | |
virtual TRT_DEPRECATED void | allowGPUFallback (bool setFallBackMode)=0 |
Sets the builder to use GPU if a layer that was supposed to run on DLA can not run on DLA. More... | |
virtual int32_t | getNbDLACores () const =0 |
Return the number of DLA engines available to this builder. | |
virtual TRT_DEPRECATED void | setDLACore (int32_t dlaCore)=0 |
Set the DLA core that the engine must execute on. More... | |
virtual TRT_DEPRECATED int32_t | getDLACore () const =0 |
Get the DLA core that the engine executes on. More... | |
virtual TRT_DEPRECATED void | reset (nvinfer1::INetworkDefinition &network)=0 |
Resets the builder state. More... | |
virtual void | setGpuAllocator (IGpuAllocator *allocator)=0 |
Set the GPU allocator. More... | |
virtual TRT_DEPRECATED void | setFp16Mode (bool mode)=0 |
Set whether or not 16-bit kernels are permitted. More... | |
virtual TRT_DEPRECATED bool | getFp16Mode () const =0 |
Query whether 16-bit kernels are permitted. More... | |
virtual TRT_DEPRECATED void | setStrictTypeConstraints (bool mode)=0 |
Set whether or not type constraints are strict. More... | |
virtual TRT_DEPRECATED bool | getStrictTypeConstraints () const =0 |
Query whether or not type constraints are strict. More... | |
virtual TRT_DEPRECATED void | setRefittable (bool canRefit)=0 |
virtual TRT_DEPRECATED bool | getRefittable () const =0 |
Query whether or not engines will be refittable. More... | |
virtual TRT_DEPRECATED void | setEngineCapability (EngineCapability capability)=0 |
Configure the builder to target specified EngineCapability flow. More... | |
virtual TRT_DEPRECATED EngineCapability | getEngineCapability () const =0 |
Query EngineCapability flow configured for the builder. More... | |
virtual nvinfer1::IBuilderConfig * | createBuilderConfig ()=0 |
Create a builder configuration object. More... | |
virtual nvinfer1::ICudaEngine * | buildEngineWithConfig (INetworkDefinition &network, IBuilderConfig &config)=0 |
Builds an engine for the given INetworkDefinition and given IBuilderConfig. More... | |
virtual nvinfer1::INetworkDefinition * | createNetworkV2 (NetworkDefinitionCreationFlags flags)=0 |
Create a network definition object. More... | |
virtual nvinfer1::IOptimizationProfile * | createOptimizationProfile () noexcept=0 |
Create a new optimization profile. More... | |
virtual void | setErrorRecorder (IErrorRecorder *recorder)=0 |
Set the ErrorRecorder for this interface. More... | |
virtual IErrorRecorder * | getErrorRecorder () const =0 |
get the ErrorRecorder assigned to this interface. More... | |
virtual void | reset ()=0 |
Resets the builder state to default values. | |
virtual bool | platformHasTf32 () const =0 |
Determine whether the platform has TF32 support. | |
Builds an engine from a network definition.
|
pure virtual |
Sets the builder to use GPU if a layer that was supposed to run on DLA can not run on DLA.
Allows | fallback if setFallBackMode is true else disables fallback option. |
|
pure virtual |
Build a CUDA engine from a network definition.
|
pure virtual |
Builds an engine for the given INetworkDefinition and given IBuilderConfig.
It enables the builder to build multiple engines based on the same network definition, but with different builder configurations.
|
pure virtual |
Checks if a layer can run on DLA.
|
pure virtual |
Create a builder configuration object.
|
pure virtual |
Create a network definition object where all tensors have an implicit batch dimension.
This method is equivalent to createNetworkV2(0U), and retained for compatibility with earlier version of TensorRT. The network does not support dynamic shapes or explicit batch sizes.
|
pure virtual |
Create a network definition object.
Creates a network definition object with immutable properties specified using the flags parameter. Providing the kDEFAULT flag as parameter mimics the behaviour of createNetwork(). CreateNetworkV2 supports dynamic shapes and explicit batch dimensions when used with NetworkDefinitionCreationFlag::kEXPLICIT_BATCH flag.
flags | Bitset of NetworkDefinitionCreationFlags specifying network properties combined with bitwise OR. e.g., 1U << NetworkDefinitionCreationFlag::kEXPLICIT_BATCH |
|
pure virtualnoexcept |
Create a new optimization profile.
If the network has any dynamic input tensors, the appropriate calls to setDimensions() must be made. Likewise, if there are any shape input tensors, the appropriate calls to setShapeValues() are required. The builder retains ownership of the created optimization profile and returns a raw pointer, i.e. the users must not attempt to delete the returned pointer.
|
pure virtual |
Query the number of averaging iterations.
|
pure virtual |
Query whether the builder will use debug synchronization.
|
pure virtual |
Get the default DeviceType which was set by setDefaultDeviceType.
|
pure virtual |
Get the device that this layer executes on.
|
pure virtual |
Get the DLA core that the engine executes on.
|
pure virtual |
Query EngineCapability flow configured for the builder.
|
pure virtual |
get the ErrorRecorder assigned to this interface.
Retrieves the assigned error recorder object for the given class. A default error recorder does not exist, so a nullptr will be returned if setErrorRecorder has not been called.
|
pure virtual |
Query whether 16-bit kernels are permitted.
|
pure virtual |
Query whether half2 mode is used.
|
pure virtual |
Query whether Int8 mode is used.
|
pure virtual |
Get the maximum batch size.
|
pure virtual |
Get the maximum batch size DLA can support. For any tensor the total volume of index dimensions combined(dimensions other than CHW) with the requested batch size should not exceed the value returned by this function.
|
pure virtual |
Get the maximum workspace size.
|
pure virtual |
Query the number of minimization iterations.
|
pure virtual |
Query whether or not engines will be refittable.
|
pure virtual |
Query whether or not type constraints are strict.
|
pure virtual |
whether the DeviceType has been explicitly set for this layer
|
pure virtual |
Resets the builder state.
|
pure virtual |
reset the DeviceType for this layer
|
pure virtual |
Set the number of averaging iterations used when timing layers.
When timing layers, the builder minimizes over a set of average times for layer execution. This parameter controls the number of iterations used in averaging.
|
pure virtual |
Set whether the builder should use debug synchronization.
If this flag is true, the builder will synchronize after timing each layer, and report the layer name. It can be useful when diagnosing issues at build time.
|
pure virtual |
Sets the default DeviceType to be used by the builder. It ensures that all the layers that can run on this device will run on it, unless setDeviceType is used to override the default DeviceType for a layer.
|
pure virtual |
Set the device that this layer must execute on.
DeviceType | that this layer must execute on. If DeviceType is not set or is reset, TensorRT will use the default DeviceType set in the builder. |
|
pure virtual |
Set the DLA core that the engine must execute on.
dlaCore | The DLA core to execute the engine on (0 to N-1, where N is the maximum number of DLA cores present on the device). Default value is 0. DLA Core is not a property of the engine that is preserved by serialization: when the engine is deserialized it will be associated with the DLA core which is configured for the runtime. |
|
pure virtual |
Configure the builder to target specified EngineCapability flow.
|
pure virtual |
Set the ErrorRecorder for this interface.
Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution. This function will call incRefCount of the registered ErrorRecorder at least once. Setting recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if a recorder has been registered.
recorder | The error recorder to register with this interface. |
|
pure virtual |
Set whether or not 16-bit kernels are permitted.
During engine build fp16 kernels will also be tried when this mode is enabled.
mode | Whether 16-bit kernels are permitted. |
|
pure virtual |
Set the GPU allocator.
allocator | Set the GPU allocator to be used by the builder. All GPU memory acquired will use this allocator. If NULL is passed, the default allocator will be used. |
Default: uses cudaMalloc/cudaFree.
|
pure virtual |
Set whether half2 mode is used.
half2 mode is a paired-image mode that is significantly faster for batch sizes greater than one on platforms with fp16 support.
mode | Whether half2 mode is used. |
|
pure virtual |
Set Int8 Calibration interface.
|
pure virtual |
Set whether or not quantized 8-bit kernels are permitted.
During engine build int8 kernels will also be tried when this mode is enabled.
mode | Whether quantized 8-bit kernels are permitted. |
|
pure virtual |
Set the maximum batch size.
batchSize | The maximum batch size which can be used at execution time, and also the batch size for which the engine will be optimized. |
|
pure virtual |
Set the maximum workspace size.
workspaceSize | The maximum GPU temporary memory which the engine can use at execution time. |
|
pure virtual |
Set the number of minimization iterations used when timing layers.
When timing layers, the builder minimizes over a set of average times for layer execution. This parameter controls the number of iterations used in minimization.
|
pure virtual |
Set whether engines will be refittable.
|
pure virtual |
Set whether or not type constraints are strict.
When strict type constraints are in use, TensorRT will always choose a layer implementation that conforms to the type constraints specified, if one exists. If this flag is not set, a higher-precision implementation may be chosen if it results in higher performance.
If no conformant layer exists, TensorRT will choose a non-conformant layer if available regardless of the setting of this flag.
See the developer guide for the definition of strictness.
mode | Whether type constraints are strict |