56 #define NV_TENSORRT_MAJOR 4
57 #define NV_TENSORRT_MINOR 0
58 #define NV_TENSORRT_PATCH 1
59 #define NV_TENSORRT_BUILD 3
61 #define NV_TENSORRT_SONAME_MAJOR 4
62 #define NV_TENSORRT_SONAME_MINOR 1
63 #define NV_TENSORRT_SONAME_PATCH 2
65 #if __cplusplus > 201103L
66 #define _TENSORRT_FINAL final
68 #define _TENSORRT_FINAL
72 #ifdef TENSORRT_BUILD_LIB
73 #define TENSORRTAPI __attribute__ ((visibility ("default")))
111 template <
typename T>
220 :
Dims2(height, width)
230 int&
h() {
return d[0]; }
237 int h()
const {
return d[0]; }
244 int&
w() {
return d[1]; }
251 int w()
const {
return d[1]; }
267 d[0] =
d[1] =
d[2] = 0;
311 :
Dims3(channels, height, width)
322 int&
c() {
return d[0]; }
329 int c()
const {
return d[0]; }
336 int&
h() {
return d[1]; }
343 int h()
const {
return d[1]; }
350 int&
w() {
return d[2]; }
357 int w()
const {
return d[2]; }
373 d[0] =
d[1] =
d[2] =
d[3] = 0;
384 Dims4(
int d0,
int d1,
int d2,
int d3)
420 DimsNCHW(
int batchSize,
int channels,
int height,
int width)
421 :
Dims4(batchSize, channels, height, width)
433 int&
n() {
return d[0]; }
440 int n()
const {
return d[0]; }
447 int&
c() {
return d[1]; }
454 int c()
const {
return d[1]; }
461 int&
h() {
return d[2]; }
468 int h()
const {
return d[2]; }
475 int&
w() {
return d[3]; }
482 int w()
const {
return d[3]; }
512 virtual void*
data()
const = 0;
513 virtual std::size_t
size()
const = 0;
595 virtual void setName(
const char* name) = 0;
604 virtual const char*
getName()
const = 0;
739 virtual void setName(
const char* name) = 0;
746 virtual const char*
getName()
const = 0;
1062 kMAX_AVERAGE_BLEND = 2
1214 virtual void setAlpha(
float alpha) = 0;
1221 virtual float getAlpha()
const = 0;
1229 virtual void setBeta(
float beta) = 0;
1236 virtual float getBeta()
const = 0;
1244 virtual void setK(
float k) = 0;
1251 virtual float getK()
const = 0;
1387 virtual void setAxes(uint32_t axes) = 0;
1394 virtual uint32_t
getAxes()
const = 0;
1420 virtual void setAxis(
int axis) = 0;
1427 virtual int getAxis()
const = 0;
2147 virtual int32_t getLayerCount()
const = 0;
2148 virtual int32_t getHiddenSize()
const = 0;
2149 virtual int32_t getMaxSeqLength()
const = 0;
2150 virtual int32_t getDataLength()
const = 0;
2394 virtual void configure(
const Dims* inputDims,
int nbInputs,
const Dims* outputDims,
int nbOutputs,
int maxBatchSize) = 0;
2429 virtual int enqueue(
int batchSize,
const void*
const* inputs,
void** outputs,
void* workspace,
cudaStream_t stream) = 0;
2445 virtual void serialize(
void* buffer) = 0;
2468 return NV_TENSORRT_VERSION;
2841 virtual void setK(
int k) = 0;
2848 virtual int getK()
const = 0;
3599 virtual bool execute(
int batchSize,
void** bindings) = 0;
3669 virtual void setName(
const char* name) = 0;
3676 virtual const char*
getName()
const = 0;
3854 kLEGACY_CALIBRATION = 0,
3855 kENTROPY_CALIBRATION = 1
3898 virtual bool getBatch(
void* bindings[],
const char* names[],
int nbBindings) = 0;
3943 virtual CalibrationAlgoType getAlgorithm() {
return CalibrationAlgoType::kENTROPY_CALIBRATION; }
4020 virtual void*
allocate(uint64_t size, uint64_t alignment, uint32_t flags) = 0;
4029 virtual void free(
void* memory) = 0;
4250 virtual IPlugin*
createPlugin(
const char* layerName,
const void* serialData,
size_t serialLength) = 0;
4322 virtual void log(
Severity severity,
const char* msg) = 0;
4329 inline int EnumMax<ILogger::Severity>()
4360 inline IBuilder* createInferBuilder(ILogger& logger)
4370 inline IRuntime* createInferRuntime(ILogger& logger)
DimensionType type[MAX_DIMS]
The type of each dimension.
Definition: NvInfer.h:164
int EnumMax< PluginFormat >()
Maximum number of elements in PluginFormat enum.
Definition: NvInfer.h:2343
virtual bool enqueue(int batchSize, void **bindings, cudaStream_t stream, cudaEvent_t *inputConsumed)=0
Asynchronously execute inference on a batch.
int w() const
Get the width.
Definition: NvInfer.h:482
int n() const
Get the index count.
Definition: NvInfer.h:440
An engine for executing inference on a built network.
Definition: NvInfer.h:3697
Substract the second element from the first.
Perform the normal matrix multiplication in the first recurrent layer.
DataType
The type of weights and tensors.
Definition: NvInfer.h:118
virtual void setAverageCountExcludesPadding(bool exclusive)=0
Set whether average pooling uses as a denominator the overlap area between the window and the unpadde...
virtual bool isNetworkInput() const =0
Whether the tensor is a network input.
virtual DimsHW getDilation() const =0
Get the dilation for a convolution.
Severity
Definition: NvInfer.h:4308
virtual void setMinFindIterations(int minFind)=0
Set the number of minimization iterations used when timing layers.
virtual int getMaxBatchSize() const =0
Get the maximum batch size.
int EnumMax< TensorLocation >()
Maximum number of elements in TensorLocation enum.
Definition: NvInfer.h:570
virtual ITensor * getCellState() const =0
Get the initial cell state of the RNN.
virtual IPlugin * createPlugin(const char *layerName, const void *serialData, size_t serialLength)=0
Create a plugin from serialized data.
virtual ITensor * addInput(const char *name, DataType type, Dims dimensions)=0
Add an input tensor to the network.
virtual uint32_t getAxes() const =0
Get the axis along which softmax occurs.
virtual void setBias(Weights bias)=0
Set the bias parameters for the RNN.
DimsNCHW(int batchSize, int channels, int height, int width)
Construct a DimsNCHW given batch size, channel count, height and width.
Definition: NvInfer.h:420
virtual void setKernelWeights(Weights weights)=0
Set the kernel weights for the deconvolution.
virtual ReduceOperation getOperation() const =0
Get the reduce operation for the layer.
#define NV_TENSORRT_MAJOR
TensorRT major version.
Definition: NvInfer.h:56
virtual IExecutionContext * createExecutionContext()=0
Create an execution context.
virtual int getBatchSize() const =0
Get the batch size used for calibration batches.
RNNOperation
Enumerates the RNN operations that may be performed by an RNN layer.
Definition: NvInfer.h:1719
virtual void setOperation(ElementWiseOperation type)=0
Set the binary operation for the layer.
virtual void setBroadcastAcrossBatch(bool broadcastAcrossBatch)=0
Set whether to enable broadcast of tensor across the batch.
A Softmax layer in a network definition.
Definition: NvInfer.h:1367
virtual int getNbGroups() const =0
Set the number of groups for a convolution.
virtual Weights getPower() const =0
Get the power value.
virtual void setWeightsForGate(int layerIndex, RNNGateType gate, bool isW, Weights weights)=0
Set the weight parameters for an individual gate in the RNN.
Definition: NvInfer.h:2707
virtual double getQuantile() const =0
The quantile (between 0 and 1) that will be used to select the region maximum when the quantile metho...
virtual std::size_t getWorkspaceSize() const =0
Get the amount of workspace the engine uses.
An application error has occurred.
An application error has been discovered, but TensorRT has recovered or fallen back to a default...
virtual Weights getBias() const =0
Get the bias parameter vector for the RNN.
virtual void destroy()=0
Destroy the allocated memory.
virtual IMatrixMultiplyLayer * addMatrixMultiply(ITensor &input0, bool transpose0, ITensor &input1, bool transpose1)=0
Add a MatrixMultiply layer to the network.
virtual bool bindingIsInput(int bindingIndex) const =0
Determine whether a binding is an input binding.
virtual int getGatherAxis() const =0
Get the non-batch dimension axis to gather on.
virtual void setInputMode(RNNInputMode op)=0
Set the input mode of the RNN layer.
virtual const char * getName() const =0
Return the name of a layer.
virtual Weights getScale() const =0
Get the scale value.
virtual DimsHW getKernelSize() const =0
Get the HW kernel size of the deconvolution.
Layer that represents an unary operation.
Definition: NvInfer.h:2558
virtual void destroy()=0
Destroy this object.
virtual DimsHW getStride() const =0
Get the stride of the deconvolution.
Rectified linear activation.
virtual bool getDebugSync() const =0
Get the debug sync flag.
virtual bool execute(int batchSize, void **bindings)=0
Synchronously execute inference on a batch.
virtual void destroy()=0
Destroy this INetworkDefinition object.
An Activation layer in a network definition.
Definition: NvInfer.h:1032
TENSORRTAPI void * createInferRuntime_INTERNAL(void *logger, int version)
Internal C entry point for creating IRuntime.
int w() const
Get the width.
Definition: NvInfer.h:357
virtual Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims)=0
Get the dimension of an output tensor.
RNNDirection
Enumerates the RNN direction that may be performed by an RNN layer.
Definition: NvInfer.h:1740
int EnumMax< DataType >()
Maximum number of elements in DataType enum.
Definition: NvInfer.h:127
Layer that represents a Matrix Multiplication.
Definition: NvInfer.h:2887
virtual DimsHW getKernelSize() const =0
Get the HW kernel size of the convolution.
virtual DataType type() const =0
The type of the memory that was allocated.
virtual int getAverageFindIterations() const =0
Query the number of averaging iterations.
DimsCHW()
Construct an empty DimsCHW object.
Definition: NvInfer.h:296
No operation is performed on the first recurrent layer.
const void * values
The weight values, in a contiguous array.
Definition: NvInfer.h:497
int c() const
Get the channel count.
Definition: NvInfer.h:454
virtual bool getDebugSync() const =0
Query whether the builder will use debug synchronization.
virtual void setAlpha(float alpha)=0
Set the LRN alpha value.
struct CUevent_st * cudaEvent_t
Forward declaration of cudaEvent_t.
Definition: NvInfer.h:99
virtual Dims getBindingDimensions(int bindingIndex) const =0
Get the dimensions of a binding.
int h() const
Get the height.
Definition: NvInfer.h:468
A convolution layer in a network definition.
Definition: NvInfer.h:789
virtual ITopKLayer * addTopK(ITensor &input, TopKOperation op, int k, uint32_t reduceAxes)=0
Add a TopK layer to the network.
Allows a serialized engine to be deserialized.
Definition: NvInfer.h:4258
virtual Dims getReshapeDimensions() const =0
Get the reshaped dimensions.
virtual void writeHistogramCache(const void *ptr, std::size_t length)=0
Save a histogram cache.
NCHW with 2-element packed channels.
virtual void setPoolingOutputDimensionsFormula(IOutputDimensionsFormula *formula)=0
Set the pooling output dimensions formula.
A RaggedSoftmax layer in a network definition.
Definition: NvInfer.h:2921
virtual unsigned getLayerCount() const =0
Get the number of layers in the RNN.
virtual int getNbOutputs() const =0
Get the number of outputs from the layer.
virtual void configure(const Dims *inputDims, int nbInputs, const Dims *outputDims, int nbOutputs, int maxBatchSize)=0
Configure the layer.
ScaleMode
Controls how scale is applied in a Scale layer.
Definition: NvInfer.h:1262
Plugin class for user-implemented layers.
Definition: NvInfer.h:2355
Layer that represents a constant value.
Definition: NvInfer.h:2931
virtual ILayer * getLayer(int index) const =0
Get the layer specified by the given index.
A Scale layer in a network definition.
Definition: NvInfer.h:1295
virtual bool getBatch(void *bindings[], const char *names[], int nbBindings)=0
Get a batch of input for calibration.
virtual IRaggedSoftMaxLayer * addRaggedSoftMax(ITensor &input, ITensor &bounds)=0
Add a RaggedSoftMax layer to the network.
virtual int getNbGroups() const =0
Set the number of groups for a deconvolution.
NHWC with 8-element packed channels (C must be a multiple of 8).
virtual void setNbOutputMaps(int nbOutputMaps)=0
Set the number of output maps for the convolution.
virtual void log(Severity severity, const char *msg)=0
virtual const char * getBindingName(int bindingIndex) const =0
Retrieve the name corresponding to a binding index.
virtual IRNNv2Layer * addRNNv2(ITensor &input, int32_t layerCount, int32_t hiddenSize, int32_t maxSeqLen, RNNOperation op)=0
Add an layerCount deep RNN layer to the network with hiddenSize internal states that can take a batch...
virtual bool getFp16Mode() const =0
Query whether 16-bit kernels are permitted.
virtual IScaleLayer * addScale(ITensor &input, ScaleMode mode, Weights shift, Weights scale, Weights power)=0
Add a Scale layer to the network.
virtual void setHiddenState(ITensor &hidden)=0
Set the initial hidden state of the RNN with the provided hidden ITensor.
int EnumMax< RNNOperation >()
Maximum number of elements in RNNOperation enum.
Definition: NvInfer.h:1728
int EnumMax< LayerType >()
Maximum number of elements in LayerType enum.
Definition: NvInfer.h:554
virtual void setAxes(uint32_t axes)=0
Set the axis along which softmax is computed. Currently, only one axis can be set.
static const int MAX_DIMS
The maximum number of dimensions supported for a tensor.
Definition: NvInfer.h:161
virtual void setPrePadding(DimsHW padding)=0
Set the padding that is applied at the start of the tensor.
virtual DataType getBindingDataType(int bindingIndex) const =0
Determine the required data type for a buffer from its binding index.
A fully connected layer in a network definition. This layer expects an input tensor of three or more ...
Definition: NvInfer.h:954
virtual ITensor * getSequenceLengths() const =0
Get the sequence lengths specified for the RNN.
virtual DataType getType() const =0
Get the data type of a tensor.
PluginFormat
Definition: NvInfer.h:2335
TENSORRTAPI void * createInferBuilder_INTERNAL(void *logger, int version)
Internal C entry point for creating IBuilder.
virtual void setKeepDimensions(bool keepDimensions)=0
Set the boolean that specifies whether or not to keep the reduced dimensions for the layer...
virtual void setBeta(float beta)=0
Set the LRN beta value.
int w() const
Get the width.
Definition: NvInfer.h:251
ReduceOperation
Enumerates the reduce operations that may be performed by a Reduce layer.
Definition: NvInfer.h:2584
An internal error has occurred. Execution is unrecoverable.
virtual const char * getName() const =0
Return the name of the execution context.
A LRN layer in a network definition.
Definition: NvInfer.h:1190
Descriptor for three-dimensional data.
Definition: NvInfer.h:258
virtual bool getInt8Mode() const =0
Query whether Int8 mode is used.
virtual int getNbLayers() const =0
Get the number of layers in the network.
virtual ElementWiseOperation getOperation() const =0
Get the binary operation for the layer.
virtual void setOperation(UnaryOperation op)=0
Set the unary operation for the layer.
virtual IPaddingLayer * addPadding(ITensor &input, DimsHW prePadding, DimsHW postPadding)=0
Add a padding layer to the network.
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInfer.h:563
Definition: NvInfer.h:1618
Builds an engine from a network definition.
Definition: NvInfer.h:4037
virtual void markOutput(ITensor &tensor)=0
Mark a tensor as a network output.
virtual DimsHW getPadding() const =0
Get the padding of the deconvolution.
virtual void setOperation(RNNOperation op)=0
Set the operation of the RNN layer.
virtual void writeCalibrationCache(const void *ptr, std::size_t length)=0
Save a calibration cache.
virtual void setBiasWeights(Weights weights)=0
Set the bias weights for the convolution.
TENSORRTAPI nvinfer1::ILogger * getLogger()
Return the logger object.
virtual TopKOperation getOperation() const =0
Get the operation for the layer.
Layer that represents a TopK reduction.
Definition: NvInfer.h:2817
virtual RNNInputMode getInputMode() const =0
Get the input mode of the RNN layer.
int EnumMax< ActivationType >()
Maximum number of elements in ActivationType enum.
Definition: NvInfer.h:1018
virtual void setInt8Mode(bool mode)=0
Set the maximum value for a region.
virtual void setReshapeDimensions(Dims dimensions)=0
Set the reshaped dimensions.
DataType type
The type of the weights.
Definition: NvInfer.h:496
virtual float getBeta() const =0
Get the LRN beta value.
virtual void setPadding(DimsHW padding)=0
Set the padding of the deconvolution.
virtual int enqueue(int batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream)=0
Execute the layer.
virtual TensorLocation getLocation() const =0
Get the storage location of a tensor.
An RNN layer in a network definition, version 2.
Definition: NvInfer.h:2144
virtual void setType(DataType type)=0
Set the data type of a tensor.
virtual ITensor * getOutput(int index) const =0
Get the output tensor specified by the given index.
virtual Permutation getSecondTranspose() const =0
Get the permutation applied by the second transpose operation.
int & h()
Get the height.
Definition: NvInfer.h:336
Elements correspond to different spatial data.
virtual DimsHW getPadding() const =0
Get the padding of the convolution.
virtual nvinfer1::INetworkDefinition * createNetwork()=0
Create a network definition object.
virtual int getNbOutputChannels() const =0
Get the number of output channels K from the fully connected layer.
virtual bool getTranspose(int index) const =0
Get the transpose flag for an input tensor.
virtual Weights getWeights() const =0
Get the weights for the layer.
virtual void setFp16Mode(bool mode)=0
Set whether or not 16-bit kernels are permitted.
virtual void reportLayerTime(const char *layerName, float ms)=0
Layer time reporting callback.
virtual void setK(int k)=0
Set the k value for the layer.
virtual IDeconvolutionLayer * addDeconvolution(ITensor &input, int nbOutputMaps, DimsHW kernelSize, Weights kernelWeights, Weights biasWeights)=0
Add a deconvolution layer to the network.
virtual IActivationLayer * addActivation(ITensor &input, ActivationType type)=0
Add an activation layer to the network.
virtual Weights getKernelWeights() const =0
Get the kernel weights.
virtual void setName(const char *name)=0
Set the name of the execution context.
virtual int getNbOutputs() const =0
Get the number of outputs in the network.
int & n()
Get the index count.
Definition: NvInfer.h:433
int EnumMax< TopKOperation >()
Maximum number of elements in TopKOperation enum.
Definition: NvInfer.h:2807
Elements correspond to different sequence values.
virtual std::size_t size() const =0
The size in bytes of the data that was allocated.
int EnumMax()
Maximum number of elements in an enumeration type.
int c() const
Get the channel count.
Definition: NvInfer.h:329
int & w()
Get the width.
Definition: NvInfer.h:350
virtual IElementWiseLayer * addElementWise(ITensor &input1, ITensor &input2, ElementWiseOperation op)=0
Add an elementwise layer to the network.
virtual bool getAverageCountExcludesPadding() const =0
Get whether exclusive pooling uses as a denominator the overlap area betwen the window and the unpadd...
Layer that represents a reduction operator.
Definition: NvInfer.h:2604
Definition: NvInfer.h:3952
PoolingType
The type of pooling to perform in a pooling layer.
Definition: NvInfer.h:1058
int h() const
Get the height.
Definition: NvInfer.h:343
virtual float getBlendFactor() const =0
Get the blending factor for the max_average_blend mode: max_average_blendPool = (1-blendFactor)*maxPo...
int order[Dims::MAX_DIMS]
Definition: NvInfer.h:2715
int EnumMax< RNNGateType >()
Maximum number of elements in RNNGateType enum.
Definition: NvInfer.h:2132
virtual void setMode(ScaleMode mode)=0
Set the scale mode.
virtual ITensor * getInput(int index) const =0
Get the layer input corresponding to the given index.
virtual bool supportsFormat(DataType type, PluginFormat format) const =0
Check format support.
virtual void setConvolutionOutputDimensionsFormula(IOutputDimensionsFormula *formula)=0
Set the convolution output dimensions formula.
int & w()
Get the width.
Definition: NvInfer.h:244
virtual void setSequenceLengths(ITensor &seqLengths)=0
Specify individual sequence lengths in the batch with the ITensor pointed to by seqLengths.
Plugin factory for deserialization.
Definition: NvInfer.h:4233
virtual int getNbInputs() const =0
Get the number of inputs in the network.
virtual ITensor * getHiddenState() const =0
Get the initial hidden state of the RNN.
virtual void setOperation(TopKOperation op)=0
Set the operation for the layer.
Descriptor for data with one channel dimension and two spatial dimensions.
Definition: NvInfer.h:290
virtual DimsHW getWindowSize() const =0
Get the window size for pooling.
The first element to the power of the second element.
A network definition for input to the builder.
Definition: NvInfer.h:2975
virtual int getNbLayers() const =0
Get the number of layers in the network.
virtual IHostMemory * serialize() const =0
Serialize the network to a stream.
virtual CalibrationAlgoType getAlgorithm()
Definition: NvInfer.h:3958
virtual IUnaryLayer * addUnary(ITensor &input, UnaryOperation operation)=0
Add a unary layer to the network.
virtual void setOperation(ReduceOperation op)=0
Set the reduce operation for the layer.
virtual void destroy()=0
Destroy this object.
int EnumMax< CalibrationAlgoType >()
Maximum number of elements in CalibrationAlgoType enum.
Definition: NvInfer.h:3859
virtual RNNDirection getDirection() const =0
Get the direction of the RNN layer.
virtual DimsHW getPadding() const =0
Get the padding for pooling.
virtual size_t getSerializationSize()=0
Find the size of the serialization buffer required.
virtual ITensor * getHiddenState() const =0
Get the initial hidden state of the RNN.
virtual IConstantLayer * addConstant(Dims dimensions, Weights weights)=0
Add a constant layer to the network.
virtual void setGpuAllocator(IGpuAllocator *allocator)=0
Set the GPU allocator.
Divide the first element by the second.
virtual int getMinFindIterations() const =0
Query the number of minimzation iterations.
virtual LayerType getType() const =0
Return the type of a layer.
virtual void setDeviceMemory(void *memory)=0
set the device memory for use by this execution context.
virtual float getK() const =0
Get the LRN K value.
virtual RNNDirection getDirection() const =0
Get the direction of the RNN layer.
Product of the two elements.
Dims2()
Construct an empty Dims2 object.
Definition: NvInfer.h:177
virtual IProfiler * getProfiler() const =0
Get the profiler.
TopKOperation
Enumerates the operations that may be performed by a TopK layer.
Definition: NvInfer.h:2800
virtual Weights getWeightsForGate(int layerIndex, RNNGateType gate, bool isW) const =0
Get the weight parameters for an individual gate in the RNN.
struct CUstream_st * cudaStream_t
Forward declaration of cudaStream_t.
Definition: NvInfer.h:98
Dims3(int d0, int d1, int d2)
Construct a Dims3 from 3 elements.
Definition: NvInfer.h:277
virtual void setStride(DimsHW stride)=0
Set the stride for pooling.
virtual int getSeqLength() const =0
Get the sequence length.
Descriptor for four-dimensional data.
Definition: NvInfer.h:364
virtual DimsHW getStride() const =0
Get the stride for pooling.
Elements correspond to different channels.
int EnumMax< ReduceOperation >()
Maximum number of elements in ReduceOperation enum.
Definition: NvInfer.h:2594
int EnumMax< DimensionType >()
Maximum number of elements in DimensionType enum.
Definition: NvInfer.h:145
ActivationType
Enumerates the types of activation to perform in an activation layer.
Definition: NvInfer.h:1010
virtual void setDeconvolutionOutputDimensionsFormula(IOutputDimensionsFormula *formula)=0
Set the deconvolution output dimensions formula.
Descriptor for data with one index dimension, one channel dimension and two spatial dimensions...
Definition: NvInfer.h:398
virtual void setKernelWeights(Weights weights)=0
Set the kernel weights, given as a KxC matrix in row-major order.
virtual void setMaxBatchSize(int batchSize)=0
Set the maximum batch size.
virtual int getBindingIndex(const char *name) const =0
Retrieve the binding index for a named tensor.
virtual IFullyConnectedLayer * addFullyConnected(ITensor &input, int nbOutputs, Weights kernelWeights, Weights biasWeights)=0
Add a fully connected layer to the network.
virtual void setGatherAxis(int axis)=0
Set the non-batch dimension axis to gather on. The axis must be less than the number of non-batch dim...
virtual int getK() const =0
Get the k value for the layer.
virtual const void * readCalibrationCache(std::size_t &length)=0
Load a calibration cache.
virtual void setBiasForGate(int layerIndex, RNNGateType gate, bool isW, Weights bias)=0
Set the bias parameters for an individual gate in the RNN.
virtual IReduceLayer * addReduce(ITensor &input, ReduceOperation operation, uint32_t reduceAxes, bool keepDimensions)=0
Add a reduce layer to the network.
virtual void setPadding(DimsHW padding)=0
Set the padding for pooling.
DimsHW()
Construct an empty DimsHW object.
Definition: NvInfer.h:207
virtual void setWindowSize(DimsHW windowSize)=0
Set the window size for pooling.
Base class for all layer classes in a network definition.
Definition: NvInfer.h:722
virtual void setTranspose(int index, bool val)=0
Set the transpose flag for an input tensor.
Dims4(int d0, int d1, int d2, int d3)
Construct a Dims4 from 4 elements.
Definition: NvInfer.h:384
int EnumMax< PoolingType >()
Maximum number of elements in PoolingType enum.
Definition: NvInfer.h:1066
int & h()
Get the height.
Definition: NvInfer.h:461
virtual int getWindowSize() const =0
Get the LRN window size.
int & h()
Get the height.
Definition: NvInfer.h:230
virtual void free(void *memory)=0
virtual void setActivationType(ActivationType type)=0
Set the type of activation to be performed.
TENSORRTAPI int getInferLibVersion()
Return the library version number.
virtual std::size_t getHiddenSize() const =0
Get the size of the hidden layers.
int & c()
Get the channel count.
Definition: NvInfer.h:447
virtual void setStride(DimsHW stride)=0
Get the stride of the deconvolution.
Structure to define the dimensions of a tensor.
Definition: NvInfer.h:158
Network iterates from first to last and vice versa and outputs concatenated.
#define NV_TENSORRT_PATCH
TensorRT patch version.
Definition: NvInfer.h:58
virtual IRNNLayer * addRNN(ITensor &inputs, int layerCount, std::size_t hiddenSize, int maxSeqLen, RNNOperation op, RNNInputMode mode, RNNDirection dir, Weights weights, Weights bias)=0
Add an layerCount deep RNN layer to the network with a sequence length of maxSeqLen and hiddenSize in...
virtual void setReduceAxes(uint32_t reduceAxes)=0
Set which axes to reduce for the layer.
virtual void setGpuAllocator(IGpuAllocator *allocator)=0
Set the GPU allocator.
Layer type for shuffling data.
Definition: NvInfer.h:2718
virtual bool platformHasFastFp16() const =0
Determine whether the platform has fast native fp16.
DimsCHW(int channels, int height, int width)
Construct a DimsCHW given channel count, height and width.
Definition: NvInfer.h:310
A elementwise layer in a network definition.
Definition: NvInfer.h:1593
A Pooling layer in a network definition.
Definition: NvInfer.h:1079
#define _TENSORRT_FINAL
Defines which symbols are exported.
Definition: NvInfer.h:68
virtual RNNOperation getOperation() const =0
Get the operation of the RNN layer.
virtual void serialize(void *buffer)=0
Serialize the layer.
virtual void setDilation(DimsHW dims)=0
Set the dilation for a convolution.
virtual void setOperation(RNNOperation op)=0
Set the operation of the RNN layer.
virtual void configureWithFormat(const Dims *inputDims, int nbInputs, const Dims *outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize)=0
Configure the layer.
virtual DimsHW getPrePadding() const =0
Set the padding that is applied at the start of the tensor.
virtual void setDirection(RNNDirection op)=0
Set the direction of the RNN layer.
int d[MAX_DIMS]
The extent of each dimension.
Definition: NvInfer.h:163
Minimum of the two elements.
virtual Weights getBiasWeights() const =0
Get the bias weights.
#define NV_TENSORRT_MINOR
TensorRT minor version.
Definition: NvInfer.h:57
virtual int getNbBindings() const =0
Get the number of binding indices.
virtual void setPadding(DimsHW padding)=0
Set the padding of the convolution.
virtual Dims getDimensions() const =0
Get the dimensions of a tensor.
ElementWiseOperation
Enumerates the binary operations that may be performed by an ElementWise layer.
Definition: NvInfer.h:1567
int64_t count
The number of weights in the array.
Definition: NvInfer.h:498
Three-gate network consisting of Gated Recurrent Units.
virtual const char * getName() const =0
Get the tensor name.
RNNGateType
Identifies an individual gate within an RNN cell.
Definition: NvInfer.h:2120
Network iterations from first input to last input.
virtual void setBiasWeights(Weights weights)=0
Set the bias weights for the deconvolution.
int & c()
Get the channel count.
Definition: NvInfer.h:322
virtual void setSecondTranspose(Permutation permutation)=0
Set the permutation applied by the second transpose operation.
virtual bool getKeepDimensions() const =0
Get the boolean that specifies whether or not to keep the reduced dimensions for the layer...
virtual bool isNetworkOutput() const =0
Whether the tensor is a network output.
Dims3()
Construct an empty Dims3 object.
Definition: NvInfer.h:264
virtual bool getBroadcastAcrossBatch() const =0
Check if tensor is broadcast across the batch.
virtual ITensor * getCellState() const =0
Get the initial cell state of the RNN.
virtual void setCellState(ITensor &cell)=0
Set the initial cell state of the LSTM with the provided cell ITensor.
A tensor in a network definition.
Definition: NvInfer.h:580
virtual void destroy()=0
Destroy this object;.
virtual void setLocation(TensorLocation location)=0
Set the storage location of a tensor.
int EnumMax< UnaryOperation >()
Maximum number of elements in UnaryOperation enum.
Definition: NvInfer.h:2548
virtual void setBlendFactor(float blendFactor)=0
Set the blending factor for the max_average_blend mode: max_average_blendPool = (1-blendFactor)*maxPo...
An array of weights used as a layer parameter.
Definition: NvInfer.h:493
int & w()
Get the width.
Definition: NvInfer.h:475
int h() const
Get the height.
Definition: NvInfer.h:237
virtual int getDataLength() const =0
Get the length of the data being processed by the RNN for use in computing other values.
virtual nvinfer1::ICudaEngine * deserializeCudaEngine(const void *blob, std::size_t size, IPluginFactory *pluginFactory)=0
Deserialize an engine from a stream.
virtual nvinfer1::ICudaEngine * buildCudaEngine(nvinfer1::INetworkDefinition &network)=0
Build a CUDA engine from a network definition.
Dims2(int d0, int d1)
Construct a Dims2 from 2 elements.
Definition: NvInfer.h:189
virtual Weights getShift() const =0
Get the shift value.
virtual void setAxis(int axis)=0
Set the axis along which concatenation occurs.
virtual int getNbOutputMaps() const =0
Get the number of output feature maps for the deconvolution.
Dims4()
Construct an empty Dims2 object.
Definition: NvInfer.h:370
virtual void setScale(Weights scale)=0
Set the scale value.
virtual IPoolingLayer * addPooling(ITensor &input, PoolingType type, DimsHW windowSize)=0
Add a pooling layer to the network.
int nbDims
The number of dimensions.
Definition: NvInfer.h:162
virtual bool platformHasFastInt8() const =0
Determine whether the platform has fast native int8.
virtual DimsHW getStride() const =0
Get the stride of the convolution.
Application-implemented interface for calibration.
Definition: NvInfer.h:3875
Application-implemented logging interface for the builder, engine and runtime.
Definition: NvInfer.h:4300
virtual IPluginLayer * addPluginExt(ITensor *const *inputs, int nbInputs, IPluginExt &plugin)=0
Add a plugin layer to the network using an IPluginExt interface.
virtual void setDirection(RNNDirection op)=0
Set the direction of the RNN layer.
virtual void setInt8Calibrator(IInt8Calibrator *calibrator)=0
Set Int8 Calibration interface.
DimsHW(int height, int width)
Construct a DimsHW given height and width.
Definition: NvInfer.h:219
virtual void * data() const =0
A pointer to the raw data that is owned by the library.
Identical coefficients across all elements of the tensor.
void configure(const Dims *inputDims, int nbInputs, const Dims *outputDims, int nbOutputs, int maxBatchSize) _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInfer.h:2505
int EnumMax< ElementWiseOperation >()
Maximum number of elements in ElementWiseOperation enum.
Definition: NvInfer.h:1579
Plugin class for user-implemented layers.
Definition: NvInfer.h:2458
Layer that represents a padding operation.
Definition: NvInfer.h:2661
virtual void setBiasWeights(Weights weights)=0
Set the bias weights.
virtual void setNbOutputMaps(int nbOutputMaps)=0
Set the number of output feature maps for the deconvolution.
virtual void setNbOutputChannels(int nbOutputs)=0
Set the number of output channels K from the fully connected layer.
virtual void setKernelWeights(Weights weights)=0
Set the kernel weights for the convolution.
virtual Weights getBiasWeights() const =0
Get the bias weights for the convolution.
virtual size_t getWorkspaceSize(int maxBatchSize) const =0
Find the workspace size required by the layer.
virtual uint32_t getReduceAxes() const =0
Get the axes over which to reduce for the layer.
virtual ITensor * getOutput(int index) const =0
Get the layer output corresponding to the given index.
virtual IPlugin & getPlugin()=0
Get the plugin for the layer.
virtual void setInputMode(RNNInputMode op)=0
Set the operation of the RNN layer.
virtual void setNbGroups(int nbGroups)=0
Set the number of groups for a convolution.
virtual void setCellState(ITensor &cell)=0
Set the initial cell state of the RNN with the provided cell ITensor.
virtual void setKernelSize(DimsHW kernelSize)=0
Set the HW kernel size of the convolution.
CalibrationAlgoType
Version of calibration algorithm to use.
Definition: NvInfer.h:3852
virtual void setStride(DimsHW stride)=0
Get the stride of the convolution.
virtual float getAlpha() const =0
Get the LRN alpha value.
virtual void setReduceAxes(uint32_t reduceAxes)=0
Set the axes over which to reduce.
virtual IConvolutionLayer * addConvolution(ITensor &input, int nbOutputMaps, DimsHW kernelSize, Weights kernelWeights, Weights biasWeights)=0
Add a convolution layer to the network.
virtual void setFirstTranspose(Permutation permutation)=0
Set the permutation applied by the first transpose operation.
DimsNCHW()
Construct an empty DimsNCHW object.
Definition: NvInfer.h:404
Class to handle library allocated memory that is accessible to the user.
Definition: NvInfer.h:509
LayerType
The type values of layer classes.
Definition: NvInfer.h:527
int EnumMax< ScaleMode >()
Maximum number of elements in ScaleMode enum.
Definition: NvInfer.h:1270
virtual void setDimensions(Dims dimensions)=0
Set the dimensions for the layer.
virtual Weights getBiasWeights() const =0
Get the bias weights for the deconvolution.
virtual void setProfiler(IProfiler *)=0
Set the profiler.
virtual void setWeights(Weights weights)=0
Set the weights for the layer.
RNNInputMode
Enumerates the RNN input modes that may occur with an RNN layer.
Definition: NvInfer.h:1767
A RNN layer in a network definition.
Definition: NvInfer.h:1788
virtual void setName(const char *name)=0
Set the name of a layer.
virtual ActivationType getActivationType() const =0
Get the type of activation to be performed.
virtual Dims getDimensions() const =0
Get the dimensions for the layer.
virtual void setDebugSync(bool sync)=0
Set the debug sync flag.
virtual void destroy()=0
Destroy this object.
virtual void setPostPadding(DimsHW padding)=0
Set the padding that is applied at the end of the tensor.
Layer type for plugins.
Definition: NvInfer.h:2516
virtual UnaryOperation getOperation() const =0
Get the unary operation for the layer.
DimensionType
The type of data encoded across this dimension.
Definition: NvInfer.h:136
virtual IConcatenationLayer * addConcatenation(ITensor *const *inputs, int nbInputs)=0
Add a concatenation layer to the network.
Four-gate LSTM network w/o peephole connections.
virtual DimsHW getPostPadding() const =0
Set the padding that is applied at the end of the tensor.
virtual const ICudaEngine & getEngine() const =0
Get the associated engine.
virtual void * allocate(uint64_t size, uint64_t alignment, uint32_t flags)=0
virtual void setHiddenState(ITensor &hidden)=0
Set the initial hidden state of the RNN with the provided hidden ITensor.
virtual TensorLocation getLocation(int bindingIndex) const =0
Get location of binding.
virtual ScaleMode getMode() const =0
Set the scale mode.
virtual ILRNLayer * addLRN(ITensor &input, int window, float alpha, float beta, float k)=0
Add a LRN layer to the network.
virtual int getTensorRTVersion() const
Return the API version with which this plugin was built.
Definition: NvInfer.h:2466
virtual void setMaxWorkspaceSize(std::size_t workspaceSize)=0
Set the maximum workspace size.
virtual void setShift(Weights shift)=0
Set the shift value.
virtual size_t getDeviceMemorySize() const =0
Return the amount of device memory required by an execution context.
virtual Weights getKernelWeights() const =0
Get the kernel weights for the deconvolution.
Application-implemented class for controlling allocation on the GPU.
Definition: NvInfer.h:4006
Context for executing inference using an engine.
Definition: NvInfer.h:3585
virtual ITensor * getInput(int index) const =0
Get the input tensor specified by the given index.
virtual void setPower(Weights power)=0
Set the power value.
virtual double getRegressionCutoff() const =0
The fraction (between 0 and 1) of the maximum used to define the regression cutoff when using regress...
virtual void setHalf2Mode(bool mode)=0
Set whether half2 mode is used.
virtual std::size_t getMaxWorkspaceSize() const =0
Get the maximum workspace size.
virtual int getAxis() const =0
Get the axis along which concatenation occurs.
Descriptor for two-dimensional spatial data.
Definition: NvInfer.h:201
UnaryOperation
Enumerates the unary operations that may be performed by a Unary layer.
Definition: NvInfer.h:2537
virtual void setAverageFindIterations(int avgFind)=0
Set the number of minimization iterations used when timing layers.
virtual int getNbOutputMaps() const =0
Get the number of output maps for the convolution.
virtual RNNOperation getOperation() const =0
Get the operation of the RNN layer.
virtual bool getHalf2Mode() const =0
Query whether half2 mode is used.
virtual int getNbInputs() const =0
Get the number of inputs of a layer.
virtual ISoftMaxLayer * addSoftMax(ITensor &input)=0
Add a SoftMax layer to the network.
Elements correspond to different batch index.
virtual Weights getBiasForGate(int layerIndex, RNNGateType gate, bool isW) const =0
Get the bias parameters for an individual gate in the RNN.
Descriptor for two-dimensional data.
Definition: NvInfer.h:171
virtual int getMaxBatchSize() const =0
Get the maximum batch size which can be used for inference.
virtual void setName(const char *name)=0
Set the tensor name.
virtual IOutputDimensionsFormula & getPoolingOutputDimensionsFormula() const =0
Get the pooling output dimensions formula.
virtual int getNbOutputs() const =0
Get the number of outputs of a layer.
virtual IGatherLayer * addGather(ITensor &data, ITensor &indices, int axis)=0
Add a gather layer to the network.
virtual IOutputDimensionsFormula & getDeconvolutionOutputDimensionsFormula() const =0
Get the deconvolution output dimensions formula.
virtual void setDimensions(Dims dimensions)=0
Set the dimensions of a tensor.
virtual void setWindowSize(int windowSize)=0
Set the LRN window size.
virtual Weights getKernelWeights() const =0
Get the kernel weights for the convolution.
virtual CalibrationAlgoType getAlgorithm()=0
Get the algorithm used by this calibrator.
Application-implemented interface for profiling.
Definition: NvInfer.h:3560
virtual RNNInputMode getInputMode() const =0
Get the operation of the RNN layer.
A concatenation layer in a network definition.
Definition: NvInfer.h:1406
virtual void setK(float k)=0
Set the LRN K value.
virtual void setNbGroups(int nbGroups)=0
Set the number of groups for a deconvolution.
virtual IPluginLayer * addPlugin(ITensor *const *inputs, int nbInputs, IPlugin &plugin)=0
Add a plugin layer to the network.
int EnumMax< RNNDirection >()
Maximum number of elements in RNNDirection enum.
Definition: NvInfer.h:1747
virtual const void * readHistogramCache(std::size_t &length)=0
Load a histogram.
virtual Permutation getFirstTranspose() const =0
Get the permutation applied by the first transpose operation.
virtual IExecutionContext * createExecutionContextWithoutDeviceMemory()=0
create an execution context without any device memory allocated
virtual void setKernelSize(DimsHW kernelSize)=0
Set the HW kernel size of the convolution.
virtual void setWeights(Weights weights)=0
Set the weight parameters for the RNN.
virtual Weights getWeights() const =0
Get the W weights for the RNN.
virtual void terminate()=0
Shutdown the layer. This is called when the engine is destroyed.
virtual PoolingType getPoolingType() const =0
Get the type of activation to be performed.
A deconvolution layer in a network definition.
Definition: NvInfer.h:1437
virtual IShuffleLayer * addShuffle(ITensor &input)=0
Add a shuffle layer to the network.
virtual void setPoolingType(PoolingType type)=0
Set the type of activation to be performed.
virtual IOutputDimensionsFormula & getConvolutionOutputDimensionsFormula() const =0
Get the convolution output dimensions formula.
virtual void setDebugSync(bool sync)=0
Set whether the builder should use debug synchronization.
virtual uint32_t getReduceAxes() const =0
Get the axes to reduce for the layer.
virtual int initialize()=0
Initialize the layer for execution. This is called when the engine is created.
Definition: NvInfer.h:3938
int EnumMax< RNNInputMode >()
Maximum number of elements in RNNInputMode enum.
Definition: NvInfer.h:1774