TensorRT
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
NvInfer.h
Go to the documentation of this file.
1 /*
2  * Copyright 1993-2018 NVIDIA Corporation. All rights reserved.
3  *
4  * NOTICE TO LICENSEE:
5  *
6  * This source code and/or documentation ("Licensed Deliverables") are
7  * subject to NVIDIA intellectual property rights under U.S. and
8  * international Copyright laws.
9  *
10  * These Licensed Deliverables contained herein is PROPRIETARY and
11  * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12  * conditions of a form of NVIDIA software license agreement by and
13  * between NVIDIA and Licensee ("License Agreement") or electronically
14  * accepted by Licensee. Notwithstanding any terms or conditions to
15  * the contrary in the License Agreement, reproduction or disclosure
16  * of the Licensed Deliverables to any third party without the express
17  * written consent of NVIDIA is prohibited.
18  *
19  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20  * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21  * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22  * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23  * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24  * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25  * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27  * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28  * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29  * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30  * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32  * OF THESE LICENSED DELIVERABLES.
33  *
34  * U.S. Government End Users. These Licensed Deliverables are a
35  * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36  * 1995), consisting of "commercial computer software" and "commercial
37  * computer software documentation" as such terms are used in 48
38  * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39  * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40  * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41  * U.S. Government End Users acquire the Licensed Deliverables with
42  * only those rights set forth herein.
43  *
44  * Any use of the Licensed Deliverables in individual and commercial
45  * software must include, in the user documentation and internal
46  * comments to the code, the above Disclaimer and U.S. Government End
47  * Users Notice.
48  */
49 
50 #ifndef NV_INFER_H
51 #define NV_INFER_H
52 
53 #include <cstddef>
54 #include <cstdint>
55 
56 #define NV_TENSORRT_MAJOR 5
57 #define NV_TENSORRT_MINOR 0
58 #define NV_TENSORRT_PATCH 0
59 #define NV_TENSORRT_BUILD 10
60 
61 #define NV_TENSORRT_SONAME_MAJOR 5
62 #define NV_TENSORRT_SONAME_MINOR 0
63 #define NV_TENSORRT_SONAME_PATCH 0
64 
65 #if __cplusplus > 201103L
66 #define _TENSORRT_FINAL final
67 #else
68 #define _TENSORRT_FINAL
69 #endif
70 
72 #ifdef TENSORRT_BUILD_LIB
73 #define TENSORRTAPI __attribute__((visibility("default")))
74 #else
75 #define TENSORRTAPI
76 #endif
77 
86 
92 
93 // forward declare some CUDA types to avoid an include dependency
94 
95 struct cublasContext;
96 struct cudnnContext;
97 
98 typedef struct CUstream_st* cudaStream_t;
99 typedef struct CUevent_st* cudaEvent_t;
100 
101 static const int NV_TENSORRT_VERSION = (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSORRT_PATCH; // major, minor, patch
102 
108 namespace nvinfer1
109 {
110 
111 template <typename T>
112 inline int EnumMax();
113 
118 enum class DataType : int
119 {
120  kFLOAT = 0,
121  kHALF = 1,
122  kINT8 = 2,
123  kINT32 = 3
124 };
125 
126 template <>
127 inline int EnumMax<DataType>()
128 {
129  return 4;
130 }
131 
137 enum class DeviceType : int
138 {
139  kGPU,
140  kDLA,
141  kDLA0 = kDLA,
142  kDLA1
143 };
144 template <>
146 {
147  return 3;
148 }
149 
154 enum class DimensionType : int
155 {
156  kSPATIAL = 0,
157  kCHANNEL = 1,
158  kINDEX = 2,
159  kSEQUENCE = 3
160 };
161 
162 template <>
164 {
165  return 4;
166 }
167 
176 class Dims
177 {
178 public:
179  static const int MAX_DIMS = 8;
180  int nbDims;
181  int d[MAX_DIMS];
183 };
184 
189 class Dims2 : public Dims
190 {
191 public:
196  {
197  nbDims = 2;
198  d[0] = d[1] = 0;
199  }
200 
207  Dims2(int d0, int d1)
208  {
209  nbDims = 2;
210  d[0] = d0;
211  d[1] = d1;
212  }
213 };
214 
219 class DimsHW : public Dims2
220 {
221 public:
226  : Dims2()
227  {
229  }
230 
237  DimsHW(int height, int width)
238  : Dims2(height, width)
239  {
241  }
242 
248  int& h() { return d[0]; }
249 
255  int h() const { return d[0]; }
256 
262  int& w() { return d[1]; }
263 
269  int w() const { return d[1]; }
270 };
271 
276 class Dims3 : public Dims
277 {
278 public:
283  {
284  nbDims = 3;
285  d[0] = d[1] = d[2] = 0;
286  }
287 
295  Dims3(int d0, int d1, int d2)
296  {
297  nbDims = 3;
298  d[0] = d0;
299  d[1] = d1;
300  d[2] = d2;
301  }
302 };
303 
308 class DimsCHW : public Dims3
309 {
310 public:
315  : Dims3()
316  {
319  }
320 
328  DimsCHW(int channels, int height, int width)
329  : Dims3(channels, height, width)
330  {
333  }
334 
340  int& c() { return d[0]; }
341 
347  int c() const { return d[0]; }
348 
354  int& h() { return d[1]; }
355 
361  int h() const { return d[1]; }
362 
368  int& w() { return d[2]; }
369 
375  int w() const { return d[2]; }
376 };
377 
382 class Dims4 : public Dims
383 {
384 public:
389  {
390  nbDims = 4;
391  d[0] = d[1] = d[2] = d[3] = 0;
392  }
393 
402  Dims4(int d0, int d1, int d2, int d3)
403  {
404  nbDims = 4;
405  d[0] = d0;
406  d[1] = d1;
407  d[2] = d2;
408  d[3] = d3;
409  }
410 };
411 
416 class DimsNCHW : public Dims4
417 {
418 public:
423  : Dims4()
424  {
428  }
429 
438  DimsNCHW(int batchSize, int channels, int height, int width)
439  : Dims4(batchSize, channels, height, width)
440  {
444  }
445 
451  int& n() { return d[0]; }
452 
458  int n() const { return d[0]; }
459 
465  int& c() { return d[1]; }
466 
472  int c() const { return d[1]; }
473 
479  int& h() { return d[2]; }
480 
486  int h() const { return d[2]; }
487 
493  int& w() { return d[3]; }
494 
500  int w() const { return d[3]; }
501 };
502 
511 class Weights
512 {
513 public:
515  const void* values;
516  int64_t count;
517 };
518 
528 {
529 public:
530  virtual void* data() const = 0;
531  virtual std::size_t size() const = 0;
532  virtual DataType type() const = 0;
533  virtual void destroy() = 0;
534 protected:
535  virtual ~IHostMemory() {}
536 };
537 
545 enum class LayerType : int
546 {
547  kCONVOLUTION = 0,
548  kFULLY_CONNECTED = 1,
549  kACTIVATION = 2,
550  kPOOLING = 3,
551  kLRN = 4,
552  kSCALE = 5,
553  kSOFTMAX = 6,
554  kDECONVOLUTION = 7,
555  kCONCATENATION = 8,
556  kELEMENTWISE = 9,
557  kPLUGIN = 10,
558  kRNN = 11,
559  kUNARY = 12,
560  kPADDING = 13,
561  kSHUFFLE = 14,
562  kREDUCE = 15,
563  kTOPK = 16,
564  kGATHER = 17,
565  kMATRIX_MULTIPLY = 18,
566  kRAGGED_SOFTMAX = 19,
567  kCONSTANT = 20,
568  kRNN_V2 = 21,
569  kIDENTITY = 22
570 };
571 
572 template <>
573 inline int EnumMax<LayerType>()
574 {
575  return 22;
576 }
577 
582 enum class TensorLocation : int
583 {
584  kDEVICE = 0,
585  kHOST = 1
586 };
587 
588 template <>
590 {
591  return 2;
592 }
593 
601 class ITensor
602 {
603 public:
616  virtual void setName(const char* name) = 0;
617 
625  virtual const char* getName() const = 0;
626 
641  virtual void setDimensions(Dims dimensions) = 0; // only valid for input tensors
642 
650  virtual Dims getDimensions() const = 0;
651 
665  virtual void setType(DataType type) = 0;
666 
674  virtual DataType getType() const = 0;
675 
684  virtual bool setDynamicRange(float min, float max) = 0;
685 
689  virtual bool isNetworkInput() const = 0;
690 
694  virtual bool isNetworkOutput() const = 0;
695 
696 protected:
697  virtual ~ITensor() {}
698 
699 public:
714  virtual void setBroadcastAcrossBatch(bool broadcastAcrossBatch) = 0;
715 
726  virtual bool getBroadcastAcrossBatch() const = 0;
727 
733  virtual TensorLocation getLocation() const = 0;
734 
745  virtual void setLocation(TensorLocation location) = 0;
746 };
747 
753 class ILayer
754 {
755 public:
761  virtual LayerType getType() const = 0;
762 
770  virtual void setName(const char* name) = 0;
771 
775 
778  virtual const char* getName() const = 0;
779 
783  virtual int getNbInputs() const = 0;
784 
792  virtual ITensor* getInput(int index) const = 0;
793 
797  virtual int getNbOutputs() const = 0;
798 
804  virtual ITensor* getOutput(int index) const = 0;
805 
815  virtual void setInput(int index, ITensor& tensor) = 0;
816 
817 
827 
828  virtual void setPrecision(DataType dataType) = 0;
829 
836 
837  virtual DataType getPrecision() const = 0;
838 
845 
846  virtual bool precisionIsSet() const = 0;
847 
852 
853  virtual void resetPrecision() = 0;
854 
865 
866  virtual void setOutputType(int index, DataType dataType) = 0;
867 
875 
876  virtual DataType getOutputType(int index) const = 0;
877 
885 
886  virtual bool outputTypeIsSet(int index) const = 0;
887 
894 
895  virtual void resetOutputType(int index) = 0;
896 
897 protected:
898  virtual ~ILayer() {}
899 };
900 
912 class IConvolutionLayer : public ILayer
913 {
914 public:
922  virtual void setKernelSize(DimsHW kernelSize) = 0;
923 
929  virtual DimsHW getKernelSize() const = 0;
930 
938  virtual void setNbOutputMaps(int nbOutputMaps) = 0;
939 
945  virtual int getNbOutputMaps() const = 0;
946 
956  virtual void setStride(DimsHW stride) = 0;
957 
961  virtual DimsHW getStride() const = 0;
962 
974  virtual void setPadding(DimsHW padding) = 0;
975 
981  virtual DimsHW getPadding() const = 0; // padding defaults to 0
982 
995  virtual void setNbGroups(int nbGroups) = 0;
996 
1002  virtual int getNbGroups() const = 0;
1003 
1012  virtual void setKernelWeights(Weights weights) = 0;
1013 
1019  virtual Weights getKernelWeights() const = 0;
1020 
1030  virtual void setBiasWeights(Weights weights) = 0;
1031 
1037  virtual Weights getBiasWeights() const = 0;
1038 
1046  virtual void setDilation(DimsHW dims) = 0;
1047 
1053  virtual DimsHW getDilation() const = 0;
1054 
1055 protected:
1056  virtual ~IConvolutionLayer() {}
1057 };
1058 
1086 {
1087 public:
1095  virtual void setNbOutputChannels(int nbOutputs) = 0;
1096 
1102  virtual int getNbOutputChannels() const = 0;
1103 
1109  virtual void setKernelWeights(Weights weights) = 0;
1110 
1116  virtual Weights getKernelWeights() const = 0;
1117 
1125  virtual void setBiasWeights(Weights weights) = 0;
1126 
1132  virtual Weights getBiasWeights() const = 0;
1133 
1134 protected:
1135  virtual ~IFullyConnectedLayer() {}
1136 };
1137 
1143 enum class ActivationType : int
1144 {
1145  kRELU = 0,
1146  kSIGMOID = 1,
1147  kTANH = 2
1148 };
1149 
1150 template <>
1152 {
1153  return 3;
1154 }
1155 
1165 class IActivationLayer : public ILayer
1166 {
1167 public:
1173  virtual void setActivationType(ActivationType type) = 0;
1174 
1180  virtual ActivationType getActivationType() const = 0;
1181 
1182 protected:
1183  virtual ~IActivationLayer() {}
1184 };
1185 
1191 enum class PoolingType : int
1192 {
1193  kMAX = 0, // Maximum over elements
1194  kAVERAGE = 1, // Average over elements. If the tensor is padded, the count includes the padding
1195  kMAX_AVERAGE_BLEND = 2 // Blending between the max pooling and average pooling: (1-blendFactor)*maxPool + blendFactor*avgPool
1196 };
1197 
1198 template <>
1200 {
1201  return 3;
1202 }
1203 
1212 class IPoolingLayer : public ILayer
1213 {
1214 public:
1222  virtual void setPoolingType(PoolingType type) = 0;
1223 
1229  virtual PoolingType getPoolingType() const = 0;
1230 
1238  virtual void setWindowSize(DimsHW windowSize) = 0;
1239 
1245  virtual DimsHW getWindowSize() const = 0;
1246 
1256  virtual void setStride(DimsHW stride) = 0;
1257 
1263  virtual DimsHW getStride() const = 0;
1264 
1274  virtual void setPadding(DimsHW padding) = 0;
1275 
1283  virtual DimsHW getPadding() const = 0;
1284 
1292  virtual void setBlendFactor(float blendFactor) = 0;
1293 
1301  virtual float getBlendFactor() const = 0;
1302 
1311  virtual void setAverageCountExcludesPadding(bool exclusive) = 0;
1312 
1318  virtual bool getAverageCountExcludesPadding() const = 0;
1319 
1320 protected:
1321  virtual ~IPoolingLayer() {}
1322 };
1323 
1331 class ILRNLayer : public ILayer
1332 {
1333 public:
1340  virtual void setWindowSize(int windowSize) = 0;
1341 
1347  virtual int getWindowSize() const = 0;
1348 
1355  virtual void setAlpha(float alpha) = 0;
1356 
1362  virtual float getAlpha() const = 0;
1363 
1370  virtual void setBeta(float beta) = 0;
1371 
1377  virtual float getBeta() const = 0;
1378 
1385  virtual void setK(float k) = 0;
1386 
1392  virtual float getK() const = 0;
1393 
1394 protected:
1395  virtual ~ILRNLayer() {}
1396 };
1397 
1403 enum class ScaleMode : int
1404 {
1405  kUNIFORM = 0,
1406  kCHANNEL = 1,
1407  kELEMENTWISE = 2
1408 };
1409 
1410 template <>
1412 {
1413  return 3;
1414 }
1415 
1436 class IScaleLayer : public ILayer
1437 {
1438 public:
1444  virtual void setMode(ScaleMode mode) = 0;
1445 
1451  virtual ScaleMode getMode() const = 0;
1452 
1458  virtual void setShift(Weights shift) = 0;
1459 
1465  virtual Weights getShift() const = 0;
1466 
1472  virtual void setScale(Weights scale) = 0;
1473 
1479  virtual Weights getScale() const = 0;
1480 
1486  virtual void setPower(Weights power) = 0;
1487 
1493  virtual Weights getPower() const = 0;
1494 
1495 protected:
1496  virtual ~IScaleLayer() {}
1497 };
1498 
1508 class ISoftMaxLayer : public ILayer
1509 {
1510 protected:
1511  virtual ~ISoftMaxLayer() {}
1512 public:
1528  virtual void setAxes(uint32_t axes) = 0;
1529 
1535  virtual uint32_t getAxes() const = 0;
1536 };
1537 
1548 {
1549 protected:
1550  virtual ~IConcatenationLayer() {}
1551 
1552 public:
1561  virtual void setAxis(int axis) = 0;
1562 
1568  virtual int getAxis() const = 0;
1569 };
1570 
1579 {
1580 public:
1588  virtual void setKernelSize(DimsHW kernelSize) = 0;
1589 
1595  virtual DimsHW getKernelSize() const = 0;
1596 
1604  virtual void setNbOutputMaps(int nbOutputMaps) = 0;
1605 
1611  virtual int getNbOutputMaps() const = 0;
1612 
1620  virtual void setStride(DimsHW stride) = 0;
1621 
1627  virtual DimsHW getStride() const = 0;
1628 
1641  virtual void setPadding(DimsHW padding) = 0;
1642 
1648  virtual DimsHW getPadding() const = 0; // padding defaults to 0
1649 
1662  virtual void setNbGroups(int nbGroups) = 0;
1663 
1669  virtual int getNbGroups() const = 0;
1670 
1679  virtual void setKernelWeights(Weights weights) = 0;
1680 
1686  virtual Weights getKernelWeights() const = 0;
1687 
1697  virtual void setBiasWeights(Weights weights) = 0;
1698 
1704  virtual Weights getBiasWeights() const = 0;
1705 
1706 protected:
1707  virtual ~IDeconvolutionLayer() {}
1708 };
1709 
1717 enum class ElementWiseOperation : int
1718 {
1719  kSUM = 0,
1720  kPROD = 1,
1721  kMAX = 2,
1722  kMIN = 3,
1723  kSUB = 4,
1724  kDIV = 5,
1725  kPOW = 6
1726 };
1727 
1728 template <>
1730 {
1731  return 7;
1732 }
1733 
1744 {
1745 public:
1755  virtual void setOperation(ElementWiseOperation type) = 0;
1756 
1764  virtual ElementWiseOperation getOperation() const = 0;
1765 
1766 protected:
1767  virtual ~IElementWiseLayer() {}
1768 };
1769 
1770 class IGatherLayer : public ILayer
1771 {
1772 public:
1779  virtual void setGatherAxis(int axis) = 0;
1780 
1786  virtual int getGatherAxis() const = 0;
1787 
1788 protected:
1789  virtual ~IGatherLayer() {}
1790 };
1791 
1871 enum class RNNOperation : int
1872 {
1873  kRELU = 0,
1874  kTANH = 1,
1875  kLSTM = 2,
1876  kGRU = 3
1877 };
1878 
1879 template <>
1881 {
1882  return 4;
1883 }
1884 
1892 enum class RNNDirection : int
1893 {
1894  kUNIDIRECTION = 0,
1895  kBIDIRECTION = 1
1896 };
1897 
1898 template <>
1900 {
1901  return 2;
1902 }
1903 
1919 enum class RNNInputMode : int
1920 {
1921  kLINEAR = 0,
1922  kSKIP = 1
1923 };
1924 
1925 template <>
1927 {
1928  return 2;
1929 }
1930 
1940 class IRNNLayer : public ILayer
1941 {
1942 public:
1948  virtual unsigned getLayerCount() const = 0;
1949 
1958  virtual std::size_t getHiddenSize() const = 0;
1959 
1968  virtual int getSeqLength() const = 0;
1969 
1975  virtual void setOperation(RNNOperation op) = 0;
1976 
1982  virtual RNNOperation getOperation() const = 0;
1983 
1989  virtual void setInputMode(RNNInputMode op) = 0;
1990 
1996  virtual RNNInputMode getInputMode() const = 0;
1997 
2009  virtual void setDirection(RNNDirection op) = 0;
2010 
2016  virtual RNNDirection getDirection() const = 0;
2017 
2132  virtual void setWeights(Weights weights) = 0;
2133 
2139  virtual Weights getWeights() const = 0;
2140 
2192  virtual void setBias(Weights bias) = 0;
2193 
2199  virtual Weights getBias() const = 0;
2200 
2207  virtual int getDataLength() const = 0;
2208 
2225  virtual void setHiddenState(ITensor& hidden) = 0;
2226 
2232  virtual ITensor* getHiddenState() const = 0;
2233 
2252  virtual void setCellState(ITensor& cell) = 0;
2253 
2259  virtual ITensor* getCellState() const = 0;
2260 
2261 protected:
2262  virtual ~IRNNLayer() {}
2263 };
2264 
2272 enum class RNNGateType : int
2273 {
2274  kINPUT = 0,
2275  kOUTPUT = 1,
2276  kFORGET = 2,
2277  kUPDATE = 3,
2278  kRESET = 4,
2279  kCELL = 5,
2280  kHIDDEN = 6
2281 };
2282 
2283 template <>
2285 {
2286  return 7;
2287 }
2288 
2296 class IRNNv2Layer : public ILayer
2297 {
2298 public:
2299  virtual int32_t getLayerCount() const = 0; //< Get the layer count of the RNN
2300  virtual int32_t getHiddenSize() const = 0; //< Get the hidden size of the RNN
2301  virtual int32_t getMaxSeqLength() const = 0; //< Get the maximum sequence length of the RNN
2302  virtual int32_t getDataLength() const = 0; //< Get the maximum data length of the RNN
2303 
2318  virtual void setSequenceLengths(ITensor& seqLengths) = 0;
2319 
2327  virtual ITensor* getSequenceLengths() const = 0;
2328 
2333  virtual void setOperation(RNNOperation op) = 0;
2334 
2339  virtual RNNOperation getOperation() const = 0;
2340 
2345  virtual void setInputMode(RNNInputMode op) = 0;
2346 
2351  virtual RNNInputMode getInputMode() const = 0;
2352 
2357  virtual void setDirection(RNNDirection op) = 0;
2358 
2363  virtual RNNDirection getDirection() const = 0;
2364 
2382  virtual void setWeightsForGate(int layerIndex, RNNGateType gate, bool isW, Weights weights) = 0;
2383 
2388  virtual Weights getWeightsForGate(int layerIndex, RNNGateType gate, bool isW) const = 0;
2389 
2405  virtual void setBiasForGate(int layerIndex, RNNGateType gate, bool isW, Weights bias) = 0;
2406 
2411  virtual Weights getBiasForGate(int layerIndex, RNNGateType gate, bool isW) const = 0;
2412 
2422  virtual void setHiddenState(ITensor& hidden) = 0;
2423 
2428  virtual ITensor* getHiddenState() const = 0;
2429 
2441  virtual void setCellState(ITensor& cell) = 0;
2442 
2447  virtual ITensor* getCellState() const = 0;
2448 
2449 protected:
2450  virtual ~IRNNv2Layer() {}
2451 };
2452 
2459 {
2460 public:
2475  virtual DimsHW compute(DimsHW inputDims, DimsHW kernelSize, DimsHW stride, DimsHW padding, DimsHW dilation, const char* layerName) const = 0;
2476 
2477 protected:
2478  virtual ~IOutputDimensionsFormula() {}
2479 };
2480 
2488 enum class PluginFormat : uint8_t
2489 {
2490  kNCHW = 0,
2491  kNC2HW2 = 1,
2492  kNHWC8 = 2
2493 };
2494 
2495 template <>
2497 {
2498  return 3;
2499 }
2500 
2508 class IPlugin
2509 {
2510 public:
2518  virtual int getNbOutputs() const = 0;
2519 
2529  virtual Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) = 0;
2530 
2547  virtual void configure(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, int maxBatchSize) = 0;
2548 
2554  virtual int initialize() = 0;
2555 
2560  virtual void terminate() = 0;
2561 
2570  virtual size_t getWorkspaceSize(int maxBatchSize) const = 0;
2571 
2583  virtual int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) = 0;
2584 
2590  virtual size_t getSerializationSize() = 0;
2591 
2599  virtual void serialize(void* buffer) = 0;
2600 
2601 protected:
2602  virtual ~IPlugin() {}
2603 };
2604 
2613 class IPluginExt : public IPlugin
2614 {
2615 public:
2621  virtual int getTensorRTVersion() const
2622  {
2623  return NV_TENSORRT_VERSION;
2624  }
2625 
2636  virtual bool supportsFormat(DataType type, PluginFormat format) const = 0;
2637 
2654  virtual void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) = 0;
2655 
2658  // \see IPluginCreator::getPluginName()
2660  virtual const char* getPluginType() const = 0;
2661 
2664  // \see IPluginCreator::getPluginVersion()
2666  virtual const char* getPluginVersion() const = 0;
2667 
2671  virtual void destroy() = 0;
2672 
2676  virtual IPluginExt* clone() const = 0;
2677 
2678 
2679 protected:
2683  void configure(const Dims* /*inputDims*/, int /*nbInputs*/, const Dims* /*outputDims*/, int /*nbOutputs*/, int /*maxBatchSize*/) _TENSORRT_FINAL {}
2684 
2685  virtual ~IPluginExt() {}
2686 };
2687 
2695 class IPluginLayer : public ILayer
2696 {
2697 public:
2703  virtual IPlugin& getPlugin() = 0;
2704 
2705 protected:
2706  virtual ~IPluginLayer() {}
2707 };
2708 
2713 
2714 enum class PluginFieldType : int
2715 {
2716  kFLOAT16 = 0,
2717  kFLOAT32 = 1,
2718  kFLOAT64 = 2,
2719  kINT8 = 3,
2720  kINT16 = 4,
2721  kINT32 = 5,
2722  kCHAR = 6,
2723  kDIMS = 7,
2724  kUNKNOWN = 8
2725 };
2726 
2735 {
2739  const char* name;
2743  const void* data;
2752  int length;
2753 
2754  PluginField(const char* name_ = nullptr, const void* data_ = nullptr, const PluginFieldType type_ = PluginFieldType::kUNKNOWN, int length_ = 0)
2755  : name(name_)
2756  , data(data_)
2757  , type(type_)
2758  , length(length_)
2759  {
2760  }
2761 };
2762 
2764 {
2765  int nbFields;
2767 };
2768 
2776 
2778 {
2779 public:
2783  virtual int getTensorRTVersion() const { return NV_TENSORRT_VERSION; }
2784 
2788  virtual const char* getPluginName() const = 0;
2789 
2793  virtual const char* getPluginVersion() const = 0;
2794 
2799  virtual const PluginFieldCollection* getFieldNames() = 0;
2800 
2804  virtual IPluginExt* createPlugin(const char* name, const PluginFieldCollection* fc) = 0;
2805 
2809  virtual IPluginExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) = 0;
2810 
2811  virtual ~IPluginCreator() {}
2812 };
2813 
2815 {
2816 public:
2821  virtual bool registerCreator(IPluginCreator&) = 0;
2822 
2827  virtual IPluginCreator* const* getPluginCreatorList(int* numCreators) const = 0;
2828 
2832  virtual IPluginCreator* getPluginCreator(const char* pluginType, const char* pluginVersion) = 0;
2833 
2834 protected:
2835  virtual ~IPluginRegistry() {}
2836 };
2837 
2845 enum class UnaryOperation : int
2846 {
2847  kEXP = 0,
2848  kLOG = 1,
2849  kSQRT = 2,
2850  kRECIP = 3,
2851  kABS = 4,
2852  kNEG = 5,
2853 };
2854 
2855 template <>
2857 {
2858  return 6;
2859 }
2860 
2866 class IUnaryLayer : public ILayer
2867 {
2868 public:
2874  virtual void setOperation(UnaryOperation op) = 0;
2875 
2881  virtual UnaryOperation getOperation() const = 0;
2882 
2883 protected:
2884  virtual ~IUnaryLayer() {}
2885 };
2886 
2892 enum class ReduceOperation : int
2893 {
2894  kSUM = 0,
2895  kPROD = 1,
2896  kMAX = 2,
2897  kMIN = 3,
2898  kAVG = 4
2899 };
2900 
2901 template <>
2903 {
2904  return 5;
2905 }
2906 
2912 class IReduceLayer : public ILayer
2913 {
2914 public:
2920  virtual void setOperation(ReduceOperation op) = 0;
2921 
2927  virtual ReduceOperation getOperation() const = 0;
2928 
2934  virtual void setReduceAxes(uint32_t reduceAxes) = 0;
2935 
2941  virtual uint32_t getReduceAxes() const = 0;
2942 
2948  virtual void setKeepDimensions(bool keepDimensions) = 0;
2949 
2955  virtual bool getKeepDimensions() const = 0;
2956 
2957 protected:
2958  virtual ~IReduceLayer() {}
2959 };
2960 
2969 class IPaddingLayer : public ILayer
2970 {
2971 public:
2979  virtual void setPrePadding(DimsHW padding) = 0;
2980 
2986  virtual DimsHW getPrePadding() const = 0;
2987 
2995  virtual void setPostPadding(DimsHW padding) = 0;
2996 
3002  virtual DimsHW getPostPadding() const = 0;
3003 
3004 protected:
3005  virtual ~IPaddingLayer() {}
3006 };
3007 
3016 {
3024 };
3025 
3026 class IShuffleLayer : public ILayer
3027 {
3028 public:
3038  virtual void setFirstTranspose(Permutation permutation) = 0;
3039 
3047  virtual Permutation getFirstTranspose() const = 0;
3048 
3067  virtual void setReshapeDimensions(Dims dimensions) = 0;
3068 
3074  virtual Dims getReshapeDimensions() const = 0;
3075 
3088  virtual void setSecondTranspose(Permutation permutation) = 0;
3089 
3097  virtual Permutation getSecondTranspose() const = 0;
3098 
3099 protected:
3100  virtual ~IShuffleLayer() {}
3101 };
3102 
3108 enum class TopKOperation : int
3109 {
3110  kMAX = 0,
3111  kMIN = 1,
3112 };
3113 
3114 template <>
3116 {
3117  return 2;
3118 }
3119 
3125 class ITopKLayer : public ILayer
3126 {
3127 public:
3133  virtual void setOperation(TopKOperation op) = 0;
3134 
3140  virtual TopKOperation getOperation() const = 0;
3141 
3149  virtual void setK(int k) = 0;
3150 
3156  virtual int getK() const = 0;
3157 
3163  virtual void setReduceAxes(uint32_t reduceAxes) = 0;
3164 
3170  virtual uint32_t getReduceAxes() const = 0;
3171 
3172 protected:
3173  virtual ~ITopKLayer() {}
3174 };
3175 
3196 {
3197 public:
3204  virtual void setTranspose(int index, bool val) = 0;
3205 
3211  virtual bool getTranspose(int index) const = 0;
3212 
3213 protected:
3214  virtual ~IMatrixMultiplyLayer() {}
3215 };
3216 
3230 {
3231 protected:
3232  virtual ~IRaggedSoftMaxLayer() {}
3233 };
3234 
3241 class IIdentityLayer : public ILayer
3242 {
3243 protected:
3244  virtual ~IIdentityLayer() {}
3245 };
3246 
3251 class IConstantLayer : public ILayer
3252 {
3253 public:
3259  virtual void setWeights(Weights weights) = 0;
3260 
3266  virtual Weights getWeights() const = 0;
3267 
3275  virtual void setDimensions(Dims dimensions) = 0;
3276 
3284  virtual Dims getDimensions() const = 0;
3285 
3286 protected:
3287  virtual ~IConstantLayer() {}
3288 };
3289 
3296 {
3297 public:
3314  virtual ITensor* addInput(const char* name, DataType type, Dims dimensions) = 0;
3315 
3321  virtual void markOutput(ITensor& tensor) = 0;
3322 
3336  virtual IConvolutionLayer* addConvolution(ITensor& input, int nbOutputMaps, DimsHW kernelSize, Weights kernelWeights, Weights biasWeights) = 0;
3337 
3350  virtual IFullyConnectedLayer* addFullyConnected(ITensor& input, int nbOutputs, Weights kernelWeights, Weights biasWeights) = 0;
3351 
3362  virtual IActivationLayer* addActivation(ITensor& input, ActivationType type) = 0;
3363 
3375  virtual IPoolingLayer* addPooling(ITensor& input, PoolingType type, DimsHW windowSize) = 0;
3376 
3390  virtual ILRNLayer* addLRN(ITensor& input, int window, float alpha, float beta, float k) = 0;
3391 
3410  virtual IScaleLayer* addScale(ITensor& input, ScaleMode mode, Weights shift, Weights scale, Weights power) = 0;
3411 
3419  virtual ISoftMaxLayer* addSoftMax(ITensor& input) = 0;
3420 
3433  virtual IConcatenationLayer* addConcatenation(ITensor* const* inputs, int nbInputs) = 0;
3434 
3448  virtual IDeconvolutionLayer* addDeconvolution(ITensor& input, int nbOutputMaps, DimsHW kernelSize, Weights kernelWeights, Weights biasWeights) = 0;
3449 
3469  virtual IElementWiseLayer* addElementWise(ITensor& input1, ITensor& input2, ElementWiseOperation op) = 0;
3470 
3525  virtual IRNNLayer* addRNN(ITensor& inputs, int layerCount, std::size_t hiddenSize, int maxSeqLen, RNNOperation op, RNNInputMode mode, RNNDirection dir, Weights weights, Weights bias) = 0;
3526 
3538  virtual IPluginLayer* addPlugin(ITensor* const* inputs, int nbInputs, IPlugin& plugin) = 0;
3539 
3550  virtual IUnaryLayer* addUnary(ITensor& input, UnaryOperation operation) = 0;
3551 
3562  virtual IPaddingLayer* addPadding(ITensor& input, DimsHW prePadding, DimsHW postPadding) = 0;
3563 
3571  virtual IShuffleLayer* addShuffle(ITensor& input) = 0;
3572 
3583 
3592 
3605 
3616 
3629 
3640 
3648  virtual int getNbLayers() const = 0;
3649 
3659  virtual ILayer* getLayer(int index) const = 0;
3660 
3668  virtual int getNbInputs() const = 0;
3669 
3679  virtual ITensor* getInput(int index) const = 0; // adding inputs invalidates indexing here
3680 
3688  virtual int getNbOutputs() const = 0;
3689 
3699  virtual ITensor* getOutput(int index) const = 0; // adding outputs invalidates indexing here
3700 
3704  virtual void destroy() = 0;
3705 
3706 protected:
3707  virtual ~INetworkDefinition() {}
3708 
3709 public:
3729  virtual IReduceLayer* addReduce(ITensor& input, ReduceOperation operation, uint32_t reduceAxes, bool keepDimensions) = 0;
3730 
3755  virtual ITopKLayer* addTopK(ITensor& input, TopKOperation op, int k, uint32_t reduceAxes) = 0;
3756 
3768  virtual IGatherLayer* addGather(ITensor& data, ITensor& indices, int axis) = 0;
3769 
3780  virtual IRaggedSoftMaxLayer* addRaggedSoftMax(ITensor& input, ITensor& bounds) = 0;
3781 
3794  virtual IMatrixMultiplyLayer* addMatrixMultiply(ITensor& input0, bool transpose0, ITensor& input1, bool transpose1) = 0;
3795 
3806  virtual IConstantLayer* addConstant(Dims dimensions, Weights weights) = 0;
3807 
3862  virtual IRNNv2Layer* addRNNv2(ITensor& input, int32_t layerCount, int32_t hiddenSize, int32_t maxSeqLen, RNNOperation op) = 0;
3863 
3875  virtual IPluginLayer* addPluginExt(ITensor* const* inputs, int nbInputs, IPluginExt& plugin) = 0;
3876 
3884  virtual IIdentityLayer* addIdentity(ITensor& input) = 0;
3885 
3895  virtual void removeTensor(ITensor& tensor) = 0;
3896 
3905  virtual void unmarkOutput(ITensor& tensor) = 0;
3906 };
3907 
3919 {
3920 public:
3927  virtual void reportLayerTime(const char* layerName, float ms) = 0;
3928 
3929 protected:
3930  virtual ~IProfiler() {}
3931 };
3932 
3933 class ICudaEngine;
3934 
3944 {
3945 public:
3957  virtual bool execute(int batchSize, void** bindings) = 0;
3958 
3972  virtual bool enqueue(int batchSize, void** bindings, cudaStream_t stream, cudaEvent_t* inputConsumed) = 0;
3973 
3981  virtual void setDebugSync(bool sync) = 0;
3982 
3988  virtual bool getDebugSync() const = 0;
3989 
3995  virtual void setProfiler(IProfiler*) = 0;
3996 
4002  virtual IProfiler* getProfiler() const = 0;
4003 
4009  virtual const ICudaEngine& getEngine() const = 0;
4010 
4014  virtual void destroy() = 0;
4015 
4016 protected:
4017  virtual ~IExecutionContext() {}
4018 
4019 public:
4027  virtual void setName(const char* name) = 0;
4028 
4034  virtual const char* getName() const = 0;
4035 
4047  virtual void setDeviceMemory(void* memory) = 0;
4048 };
4049 
4056 {
4057 public:
4063  virtual int getNbBindings() const = 0;
4064 
4078  virtual int getBindingIndex(const char* name) const = 0;
4079 
4090  virtual const char* getBindingName(int bindingIndex) const = 0;
4091 
4100  virtual bool bindingIsInput(int bindingIndex) const = 0;
4101 
4110  virtual Dims getBindingDimensions(int bindingIndex) const = 0;
4111 
4120  virtual DataType getBindingDataType(int bindingIndex) const = 0;
4121 
4127  virtual int getMaxBatchSize() const = 0;
4128 
4137  virtual int getNbLayers() const = 0;
4138 
4145  virtual std::size_t getWorkspaceSize() const = 0;
4146 
4156  virtual IHostMemory* serialize() const = 0;
4157 
4164 
4168  virtual void destroy() = 0;
4169 
4180  virtual TensorLocation getLocation(int bindingIndex) const = 0;
4181 
4182 protected:
4183  virtual ~ICudaEngine() {}
4184 
4185 public:
4194 
4200  virtual size_t getDeviceMemorySize() const = 0;
4201 };
4202 
4208 enum class CalibrationAlgoType : int
4209 {
4210  kLEGACY_CALIBRATION = 0,
4211  kENTROPY_CALIBRATION = 1
4212 };
4213 
4214 template <>
4216 {
4217  return 2;
4218 }
4219 
4232 {
4233 public:
4239  virtual int getBatchSize() const = 0;
4240 
4254  virtual bool getBatch(void* bindings[], const char* names[], int nbBindings) = 0; // get a pointer to the input batch
4255 
4268  virtual const void* readCalibrationCache(std::size_t& length) = 0;
4269 
4278  virtual void writeCalibrationCache(const void* ptr, std::size_t length) = 0;
4279 
4285  virtual CalibrationAlgoType getAlgorithm() = 0;
4286 
4287 protected:
4288  virtual ~IInt8Calibrator() {}
4289 };
4290 
4295 {
4296 public:
4300  virtual CalibrationAlgoType getAlgorithm() { return CalibrationAlgoType::kENTROPY_CALIBRATION; }
4301 protected:
4302  virtual ~IInt8EntropyCalibrator() {}
4303 };
4304 
4310 {
4311 public:
4315  virtual CalibrationAlgoType getAlgorithm() { return CalibrationAlgoType::kLEGACY_CALIBRATION; }
4316 
4322  virtual double getQuantile() const = 0;
4323 
4329  virtual double getRegressionCutoff() const = 0;
4330 
4342  virtual const void* readHistogramCache(std::size_t& length) = 0;
4343 
4352  virtual void writeHistogramCache(const void* ptr, std::size_t length) = 0;
4353 
4354 protected:
4355  virtual ~IInt8LegacyCalibrator() {}
4356 };
4357 
4364 {
4365 public:
4380  virtual void* allocate(uint64_t size, uint64_t alignment, uint32_t flags) = 0;
4381 
4389  virtual void free(void* memory) = 0;
4390 };
4391 
4398 {
4399 public:
4406 
4414  virtual void setMaxBatchSize(int batchSize) = 0;
4415 
4424  virtual int getMaxBatchSize() const = 0;
4425 
4433  virtual void setMaxWorkspaceSize(std::size_t workspaceSize) = 0;
4434 
4442  virtual std::size_t getMaxWorkspaceSize() const = 0;
4443 
4455  virtual void setHalf2Mode(bool mode) = 0;
4456 
4464  virtual bool getHalf2Mode() const = 0;
4465 
4471  virtual void setDebugSync(bool sync) = 0;
4472 
4478  virtual bool getDebugSync() const = 0;
4479 
4488  virtual void setMinFindIterations(int minFind) = 0;
4489 
4495  virtual int getMinFindIterations() const = 0;
4496 
4505  virtual void setAverageFindIterations(int avgFind) = 0;
4506 
4512  virtual int getAverageFindIterations() const = 0;
4513 
4520 
4524  virtual bool platformHasFastFp16() const = 0;
4525 
4529  virtual bool platformHasFastInt8() const = 0;
4530 
4534  virtual void destroy() = 0;
4535 
4541  virtual void setInt8Mode(bool mode) = 0;
4542 
4548  virtual bool getInt8Mode() const = 0;
4549 
4553  virtual void setInt8Calibrator(IInt8Calibrator* calibrator) = 0;
4554 
4561  virtual void setDeviceType(ILayer* layer, DeviceType deviceType) = 0;
4562 
4567  virtual DeviceType getDeviceType(const ILayer* layer) const = 0;
4568 
4574  virtual bool isDeviceTypeSet(const ILayer* layer) const = 0;
4575 
4581  virtual void resetDeviceType(ILayer* layer) = 0;
4582 
4587  virtual bool canRunOnDLA(const ILayer* layer) const = 0;
4588 
4593  virtual void setDefaultDeviceType(DeviceType deviceType) = 0;
4594 
4598  virtual DeviceType getDefaultDeviceType() const = 0;
4599 
4604  virtual int getMaxDLABatchSize(DeviceType deviceType) const = 0;
4605 
4610  virtual void allowGPUFallback(bool setFallBackMode) = 0;
4611 
4615  virtual void reset(nvinfer1::INetworkDefinition& network) = 0;
4616 
4617 
4618 protected:
4619  virtual ~IBuilder() {}
4620 
4621 public:
4631  virtual void setGpuAllocator(IGpuAllocator* allocator) = 0;
4632 
4642  virtual void setFp16Mode(bool mode) = 0;
4643 
4649  virtual bool getFp16Mode() const = 0;
4650 
4665  virtual void setStrictTypeConstraints(bool mode) = 0;
4666 
4672  virtual bool getStrictTypeConstraints() const = 0;
4673 
4674 };
4675 
4682 {
4683 public:
4698  virtual IPlugin* createPlugin(const char* layerName, const void* serialData, size_t serialLength) = 0;
4699 };
4700 
4707 {
4708 public:
4718  virtual nvinfer1::ICudaEngine* deserializeCudaEngine(const void* blob, std::size_t size, IPluginFactory* pluginFactory) = 0;
4719 
4723  virtual void destroy() = 0;
4724 
4725 protected:
4726  virtual ~IRuntime() {}
4727 
4728 public:
4737  virtual void setGpuAllocator(IGpuAllocator* allocator) = 0;
4738 };
4739 
4748 class ILogger
4749 {
4750 public:
4756  enum class Severity
4757  {
4758  kINTERNAL_ERROR = 0,
4759  kERROR = 1,
4760  kWARNING = 2,
4761  kINFO = 3
4762  };
4763 
4770  virtual void log(Severity severity, const char* msg) = 0;
4771 
4772 protected:
4773  virtual ~ILogger() {}
4774 };
4775 
4776 template <>
4777 inline int EnumMax<ILogger::Severity>()
4778 {
4779  return 4;
4780 }
4781 
4782 } // namespace nvinfer1
4783 
4784 extern "C" TENSORRTAPI void* createInferBuilder_INTERNAL(void* logger, int version);
4785 extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(void* logger, int version);
4786 
4790 extern "C" TENSORRTAPI nvinfer1::ILogger* getLogger();
4791 
4797 extern "C" TENSORRTAPI int getInferLibVersion();
4798 
4802 extern "C" TENSORRTAPI nvinfer1::IPluginRegistry* getPluginRegistry();
4803 
4804 namespace nvinfer1
4805 {
4811 namespace // unnamed namespace in case the compiler doesn't inline these
4812 {
4813 inline IBuilder* createInferBuilder(ILogger& logger)
4814 {
4815  return static_cast<IBuilder*>(createInferBuilder_INTERNAL(&logger, NV_TENSORRT_VERSION));
4816 }
4817 
4823 inline IRuntime* createInferRuntime(ILogger& logger)
4824 {
4825  return static_cast<IRuntime*>(createInferRuntime_INTERNAL(&logger, NV_TENSORRT_VERSION));
4826 }
4827 }
4828 
4835 template <typename T>
4837 {
4838 public:
4840 private:
4841  T instance{};
4842 };
4843 
4844 #define REGISTER_TENSORRT_PLUGIN(name) static PluginRegistrar<name> pluginRegistrar##name{}
4845 
4846 }
4847 
4848 #endif
DimensionType type[MAX_DIMS]
The type of each dimension.
Definition: NvInfer.h:182
int EnumMax< PluginFormat >()
Maximum number of elements in PluginFormat enum.
Definition: NvInfer.h:2496
virtual bool enqueue(int batchSize, void **bindings, cudaStream_t stream, cudaEvent_t *inputConsumed)=0
Asynchronously execute inference on a batch.
int w() const
Get the width.
Definition: NvInfer.h:500
int n() const
Get the index count.
Definition: NvInfer.h:458
An engine for executing inference on a built network.
Definition: NvInfer.h:4055
Substract the second element from the first.
Perform the normal matrix multiplication in the first recurrent layer.
DataType
The type of weights and tensors.
Definition: NvInfer.h:118
virtual void setAverageCountExcludesPadding(bool exclusive)=0
Set whether average pooling uses as a denominator the overlap area between the window and the unpadde...
virtual bool isNetworkInput() const =0
Whether the tensor is a network input.
virtual DimsHW getDilation() const =0
Get the dilation for a convolution.
Severity
Definition: NvInfer.h:4756
virtual void setMinFindIterations(int minFind)=0
Set the number of minimization iterations used when timing layers.
virtual int getMaxBatchSize() const =0
Get the maximum batch size.
int EnumMax< TensorLocation >()
Maximum number of elements in TensorLocation enum.
Definition: NvInfer.h:589
virtual ITensor * getCellState() const =0
Get the initial cell state of the RNN.
virtual IPlugin * createPlugin(const char *layerName, const void *serialData, size_t serialLength)=0
Create a plugin from serialized data.
virtual ITensor * addInput(const char *name, DataType type, Dims dimensions)=0
Add an input tensor to the network.
virtual uint32_t getAxes() const =0
Get the axis along which softmax occurs.
virtual void resetPrecision()=0
reset the computational precision for this layer
virtual void setBias(Weights bias)=0
Set the bias parameters for the RNN.
DimsNCHW(int batchSize, int channels, int height, int width)
Construct a DimsNCHW given batch size, channel count, height and width.
Definition: NvInfer.h:438
virtual void setKernelWeights(Weights weights)=0
Set the kernel weights for the deconvolution.
virtual ReduceOperation getOperation() const =0
Get the reduce operation for the layer.
#define NV_TENSORRT_MAJOR
TensorRT major version.
Definition: NvInfer.h:56
virtual IExecutionContext * createExecutionContext()=0
Create an execution context.
virtual int getBatchSize() const =0
Get the batch size used for calibration batches.
RNNOperation
Enumerates the RNN operations that may be performed by an RNN layer.
Definition: NvInfer.h:1871
virtual void setOperation(ElementWiseOperation type)=0
Set the binary operation for the layer.
virtual void setBroadcastAcrossBatch(bool broadcastAcrossBatch)=0
Set whether to enable broadcast of tensor across the batch.
A Softmax layer in a network definition.
Definition: NvInfer.h:1508
virtual int getNbGroups() const =0
Set the number of groups for a convolution.
virtual Weights getPower() const =0
Get the power value.
virtual void setWeightsForGate(int layerIndex, RNNGateType gate, bool isW, Weights weights)=0
Set the weight parameters for an individual gate in the RNN.
Definition: NvInfer.h:3015
virtual double getQuantile() const =0
The quantile (between 0 and 1) that will be used to select the region maximum when the quantile metho...
virtual std::size_t getWorkspaceSize() const =0
Get the amount of workspace the engine uses.
An application error has occurred.
An application error has been discovered, but TensorRT has recovered or fallen back to a default...
virtual Weights getBias() const =0
Get the bias parameter vector for the RNN.
virtual void destroy()=0
Destroy the allocated memory.
virtual IMatrixMultiplyLayer * addMatrixMultiply(ITensor &input0, bool transpose0, ITensor &input1, bool transpose1)=0
Add a MatrixMultiply layer to the network.
virtual bool bindingIsInput(int bindingIndex) const =0
Determine whether a binding is an input binding.
virtual int getGatherAxis() const =0
Get the non-batch dimension axis to gather on.
virtual void setInputMode(RNNInputMode op)=0
Set the input mode of the RNN layer.
virtual const char * getName() const =0
Return the name of a layer.
virtual Weights getScale() const =0
Get the scale value.
virtual DimsHW getKernelSize() const =0
Get the HW kernel size of the deconvolution.
Layer that represents an unary operation.
Definition: NvInfer.h:2866
virtual void destroy()=0
Destroy this object.
virtual DimsHW getStride() const =0
Get the stride of the deconvolution.
virtual IPluginExt * createPlugin(const char *name, const PluginFieldCollection *fc)=0
a plugin object. Return nullptr in case of error
PluginFieldType
Definition: NvInfer.h:2714
Rectified linear activation.
virtual bool getDebugSync() const =0
Get the debug sync flag.
virtual bool execute(int batchSize, void **bindings)=0
Synchronously execute inference on a batch.
virtual void destroy()=0
Destroy this INetworkDefinition object.
virtual const PluginFieldCollection * getFieldNames()=0
Return a list of fields that needs to be passed to createPlugin.
An Activation layer in a network definition.
Definition: NvInfer.h:1165
TENSORRTAPI void * createInferRuntime_INTERNAL(void *logger, int version)
Internal C entry point for creating IRuntime.
int w() const
Get the width.
Definition: NvInfer.h:375
virtual Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims)=0
Get the dimension of an output tensor.
RNNDirection
Enumerates the RNN direction that may be performed by an RNN layer.
Definition: NvInfer.h:1892
int EnumMax< DataType >()
Maximum number of elements in DataType enum.
Definition: NvInfer.h:127
Layer that represents a Matrix Multiplication.
Definition: NvInfer.h:3195
virtual DimsHW getKernelSize() const =0
Get the HW kernel size of the convolution.
virtual DataType type() const =0
The type of the memory that was allocated.
virtual int getAverageFindIterations() const =0
Query the number of averaging iterations.
DimsCHW()
Construct an empty DimsCHW object.
Definition: NvInfer.h:314
No operation is performed on the first recurrent layer.
const void * values
The weight values, in a contiguous array.
Definition: NvInfer.h:515
int c() const
Get the channel count.
Definition: NvInfer.h:472
void configure(const Dims *, int, const Dims *, int, int) _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInfer.h:2683
virtual bool getDebugSync() const =0
Query whether the builder will use debug synchronization.
virtual void setAlpha(float alpha)=0
Set the LRN alpha value.
struct CUevent_st * cudaEvent_t
Forward declaration of cudaEvent_t.
Definition: NvInfer.h:99
virtual Dims getBindingDimensions(int bindingIndex) const =0
Get the dimensions of a binding.
int h() const
Get the height.
Definition: NvInfer.h:486
virtual void setPrecision(DataType dataType)=0
Set the computational precision of this layer.
A convolution layer in a network definition.
Definition: NvInfer.h:912
virtual ITopKLayer * addTopK(ITensor &input, TopKOperation op, int k, uint32_t reduceAxes)=0
Add a TopK layer to the network.
virtual const char * getPluginVersion() const =0
Return the plugin version. Should match the plugin version returned by the corresponding plugin creat...
Allows a serialized engine to be deserialized.
Definition: NvInfer.h:4706
virtual Dims getReshapeDimensions() const =0
Get the reshaped dimensions.
virtual void writeHistogramCache(const void *ptr, std::size_t length)=0
Save a histogram cache.
NCHW with 2-element packed channels.
virtual void setPoolingOutputDimensionsFormula(IOutputDimensionsFormula *formula)=0
Set the pooling output dimensions formula.
A RaggedSoftmax layer in a network definition.
Definition: NvInfer.h:3229
virtual unsigned getLayerCount() const =0
Get the number of layers in the RNN.
virtual int getNbOutputs() const =0
Get the number of outputs from the layer.
virtual void configure(const Dims *inputDims, int nbInputs, const Dims *outputDims, int nbOutputs, int maxBatchSize)=0
Configure the layer.
ScaleMode
Controls how scale is applied in a Scale layer.
Definition: NvInfer.h:1403
Plugin class for user-implemented layers.
Definition: NvInfer.h:2508
Layer that represents a constant value.
Definition: NvInfer.h:3251
virtual IPluginExt * deserializePlugin(const char *name, const void *serialData, size_t serialLength)=0
during deserialization of plugin layer. Return a plugin object
virtual DataType getPrecision() const =0
get the computational precision of this layer
virtual ILayer * getLayer(int index) const =0
Get the layer specified by the given index.
A Scale layer in a network definition.
Definition: NvInfer.h:1436
virtual bool getBatch(void *bindings[], const char *names[], int nbBindings)=0
Get a batch of input for calibration.
virtual IRaggedSoftMaxLayer * addRaggedSoftMax(ITensor &input, ITensor &bounds)=0
Add a RaggedSoftMax layer to the network.
virtual int getNbGroups() const =0
Get the number of groups for a deconvolution.
NHWC with 8-element packed channels (C must be a multiple of 8).
virtual void setNbOutputMaps(int nbOutputMaps)=0
Set the number of output maps for the convolution.
virtual void log(Severity severity, const char *msg)=0
virtual const char * getBindingName(int bindingIndex) const =0
Retrieve the name corresponding to a binding index.
virtual IRNNv2Layer * addRNNv2(ITensor &input, int32_t layerCount, int32_t hiddenSize, int32_t maxSeqLen, RNNOperation op)=0
Add an layerCount deep RNN layer to the network with hiddenSize internal states that can take a batch...
const PluginField * fields
Pointer to PluginField entries.
Definition: NvInfer.h:2766
virtual bool getFp16Mode() const =0
Query whether 16-bit kernels are permitted.
virtual IScaleLayer * addScale(ITensor &input, ScaleMode mode, Weights shift, Weights scale, Weights power)=0
Add a Scale layer to the network.
virtual void setHiddenState(ITensor &hidden)=0
Set the initial hidden state of the RNN with the provided hidden ITensor.
int EnumMax< RNNOperation >()
Maximum number of elements in RNNOperation enum.
Definition: NvInfer.h:1880
int EnumMax< LayerType >()
Maximum number of elements in LayerType enum.
Definition: NvInfer.h:573
virtual void setAxes(uint32_t axes)=0
Set the axis along which softmax is computed. Currently, only one axis can be set.
static const int MAX_DIMS
The maximum number of dimensions supported for a tensor.
Definition: NvInfer.h:179
virtual void setPrePadding(DimsHW padding)=0
Set the padding that is applied at the start of the tensor.
virtual DataType getBindingDataType(int bindingIndex) const =0
Determine the required data type for a buffer from its binding index.
A fully connected layer in a network definition. This layer expects an input tensor of three or more ...
Definition: NvInfer.h:1085
virtual ITensor * getSequenceLengths() const =0
Get the sequence lengths specified for the RNN.
virtual void resetDeviceType(ILayer *layer)=0
reset the DeviceType for this layer
virtual DataType getType() const =0
Get the data type of a tensor.
PluginFormat
Definition: NvInfer.h:2488
TENSORRTAPI void * createInferBuilder_INTERNAL(void *logger, int version)
Internal C entry point for creating IBuilder.
virtual void setKeepDimensions(bool keepDimensions)=0
Set the boolean that specifies whether or not to keep the reduced dimensions for the layer...
virtual void setBeta(float beta)=0
Set the LRN beta value.
int w() const
Get the width.
Definition: NvInfer.h:269
ReduceOperation
Enumerates the reduce operations that may be performed by a Reduce layer.
Definition: NvInfer.h:2892
Definition: NvInfer.h:2763
An internal error has occurred. Execution is unrecoverable.
virtual const char * getName() const =0
Return the name of the execution context.
A LRN layer in a network definition.
Definition: NvInfer.h:1331
int EnumMax< DeviceType >()
Maximum number of elements in DeviceType enum.
Definition: NvInfer.h:145
Descriptor for three-dimensional data.
Definition: NvInfer.h:276
virtual bool getInt8Mode() const =0
Query whether Int8 mode is used.
virtual int getNbLayers() const =0
Get the number of layers in the network.
virtual ElementWiseOperation getOperation() const =0
Get the binary operation for the layer.
virtual void setOperation(UnaryOperation op)=0
Set the unary operation for the layer.
virtual IPaddingLayer * addPadding(ITensor &input, DimsHW prePadding, DimsHW postPadding)=0
Add a padding layer to the network.
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInfer.h:582
UnaryOp Operation Layer.
Definition: NvInfer.h:1770
Builds an engine from a network definition.
Definition: NvInfer.h:4397
virtual void markOutput(ITensor &tensor)=0
Mark a tensor as a network output.
virtual DimsHW getPadding() const =0
Get the padding of the deconvolution.
virtual void setOperation(RNNOperation op)=0
Set the operation of the RNN layer.
virtual void writeCalibrationCache(const void *ptr, std::size_t length)=0
Save a calibration cache.
virtual void setBiasWeights(Weights weights)=0
Set the bias weights for the convolution.
TENSORRTAPI nvinfer1::ILogger * getLogger()
Return the logger object.
virtual TopKOperation getOperation() const =0
Get the operation for the layer.
Layer that represents a TopK reduction.
Definition: NvInfer.h:3125
virtual RNNInputMode getInputMode() const =0
Get the input mode of the RNN layer.
int EnumMax< ActivationType >()
Maximum number of elements in ActivationType enum.
Definition: NvInfer.h:1151
virtual void setInt8Mode(bool mode)=0
Set the maximum value for a region.
virtual void setReshapeDimensions(Dims dimensions)=0
Set the reshaped dimensions.
DataType type
The type of the weights.
Definition: NvInfer.h:514
virtual float getBeta() const =0
Get the LRN beta value.
virtual void setPadding(DimsHW padding)=0
Set the padding of the deconvolution.
virtual int enqueue(int batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream)=0
Execute the layer.
virtual TensorLocation getLocation() const =0
Get the storage location of a tensor.
An RNN layer in a network definition, version 2.
Definition: NvInfer.h:2296
Register the plugin creator to the registry The static registry object will be instantiated when the ...
Definition: NvInfer.h:4836
virtual void setType(DataType type)=0
Set the data type of a tensor.
virtual ITensor * getOutput(int index) const =0
Get the output tensor specified by the given index.
virtual Permutation getSecondTranspose() const =0
Get the permutation applied by the second transpose operation.
int & h()
Get the height.
Definition: NvInfer.h:354
Elements correspond to different spatial data.
virtual DimsHW getPadding() const =0
Get the padding of the convolution.
virtual nvinfer1::INetworkDefinition * createNetwork()=0
Create a network definition object.
virtual int getNbOutputChannels() const =0
Get the number of output channels K from the fully connected layer.
virtual bool getTranspose(int index) const =0
Get the transpose flag for an input tensor.
virtual Weights getWeights() const =0
Get the weights for the layer.
virtual void setFp16Mode(bool mode)=0
Set whether or not 16-bit kernels are permitted.
virtual void reportLayerTime(const char *layerName, float ms)=0
Layer time reporting callback.
virtual void setK(int k)=0
Set the k value for the layer.
virtual IDeconvolutionLayer * addDeconvolution(ITensor &input, int nbOutputMaps, DimsHW kernelSize, Weights kernelWeights, Weights biasWeights)=0
Add a deconvolution layer to the network.
virtual IActivationLayer * addActivation(ITensor &input, ActivationType type)=0
Add an activation layer to the network.
virtual Weights getKernelWeights() const =0
Get the kernel weights.
virtual void setName(const char *name)=0
Set the name of the execution context.
virtual int getNbOutputs() const =0
Get the number of outputs in the network.
int & n()
Get the index count.
Definition: NvInfer.h:451
Data stored on device.
int EnumMax< TopKOperation >()
Maximum number of elements in TopKOperation enum.
Definition: NvInfer.h:3115
Elements correspond to different sequence values.
virtual std::size_t size() const =0
The size in bytes of the data that was allocated.
int EnumMax()
Maximum number of elements in an enumeration type.
virtual bool isDeviceTypeSet(const ILayer *layer) const =0
whether the DeviceType has been explicitly set for this layer
int c() const
Get the channel count.
Definition: NvInfer.h:347
int & w()
Get the width.
Definition: NvInfer.h:368
virtual IElementWiseLayer * addElementWise(ITensor &input1, ITensor &input2, ElementWiseOperation op)=0
Add an elementwise layer to the network.
virtual bool getAverageCountExcludesPadding() const =0
Get whether exclusive pooling uses as a denominator the overlap area betwen the window and the unpadd...
Layer that represents a reduction operator.
Definition: NvInfer.h:2912
Definition: NvInfer.h:4309
PoolingType
The type of pooling to perform in a pooling layer.
Definition: NvInfer.h:1191
int h() const
Get the height.
Definition: NvInfer.h:361
Definition: NvInfer.h:2814
Sum of the two elements.
virtual float getBlendFactor() const =0
Get the blending factor for the max_average_blend mode: max_average_blendPool = (1-blendFactor)*maxPo...
int order[Dims::MAX_DIMS]
Definition: NvInfer.h:3023
int EnumMax< RNNGateType >()
Maximum number of elements in RNNGateType enum.
Definition: NvInfer.h:2284
virtual void setMode(ScaleMode mode)=0
Set the scale mode.
virtual ITensor * getInput(int index) const =0
Get the layer input corresponding to the given index.
virtual bool supportsFormat(DataType type, PluginFormat format) const =0
Check format support.
virtual void setConvolutionOutputDimensionsFormula(IOutputDimensionsFormula *formula)=0
Set the convolution output dimensions formula.
Plugin creator class for user implemented layers.
Definition: NvInfer.h:2777
int & w()
Get the width.
Definition: NvInfer.h:262
virtual void setSequenceLengths(ITensor &seqLengths)=0
Specify individual sequence lengths in the batch with the ITensor pointed to by seqLengths.
Plugin factory for deserialization.
Definition: NvInfer.h:4681
virtual int getNbInputs() const =0
Get the number of inputs in the network.
virtual ITensor * getHiddenState() const =0
Get the initial hidden state of the RNN.
int length
Number of data entries in the Plugin attribute.
Definition: NvInfer.h:2752
virtual void setOperation(TopKOperation op)=0
Set the operation for the layer.
Descriptor for data with one channel dimension and two spatial dimensions.
Definition: NvInfer.h:308
virtual DimsHW getWindowSize() const =0
Get the window size for pooling.
The first element to the power of the second element.
A network definition for input to the builder.
Definition: NvInfer.h:3295
virtual int getNbLayers() const =0
Get the number of layers in the network.
virtual IHostMemory * serialize() const =0
Serialize the network to a stream.
virtual CalibrationAlgoType getAlgorithm()
Definition: NvInfer.h:4315
virtual IUnaryLayer * addUnary(ITensor &input, UnaryOperation operation)=0
Add a unary layer to the network.
Structure containing plugin attribute field names and associated data This information can be parsed ...
Definition: NvInfer.h:2734
virtual void setOperation(ReduceOperation op)=0
Set the reduce operation for the layer.
virtual int getTensorRTVersion() const
Return the version of the API the plugin creator was compiled with.
Definition: NvInfer.h:2783
virtual void destroy()=0
Destroy this object.
int EnumMax< CalibrationAlgoType >()
Maximum number of elements in CalibrationAlgoType enum.
Definition: NvInfer.h:4215
virtual RNNDirection getDirection() const =0
Get the direction of the RNN layer.
virtual DimsHW getPadding() const =0
Get the padding for pooling.
virtual size_t getSerializationSize()=0
Find the size of the serialization buffer required.
virtual ITensor * getHiddenState() const =0
Get the initial hidden state of the RNN.
virtual IConstantLayer * addConstant(Dims dimensions, Weights weights)=0
Add a constant layer to the network.
virtual void setGpuAllocator(IGpuAllocator *allocator)=0
Set the GPU allocator.
virtual bool getStrictTypeConstraints() const =0
Query whether or not type constraints are strict.
Divide the first element by the second.
virtual int getMinFindIterations() const =0
Query the number of minimization iterations.
virtual LayerType getType() const =0
Return the type of a layer.
virtual void setDeviceMemory(void *memory)=0
set the device memory for use by this execution context.
virtual float getK() const =0
Get the LRN K value.
virtual void resetOutputType(int index)=0
reset the output type for this layer
virtual RNNDirection getDirection() const =0
Get the direction of the RNN layer.
Product of the two elements.
Dims2()
Construct an empty Dims2 object.
Definition: NvInfer.h:195
virtual IProfiler * getProfiler() const =0
Get the profiler.
TopKOperation
Enumerates the operations that may be performed by a TopK layer.
Definition: NvInfer.h:3108
virtual Weights getWeightsForGate(int layerIndex, RNNGateType gate, bool isW) const =0
Get the weight parameters for an individual gate in the RNN.
struct CUstream_st * cudaStream_t
Forward declaration of cudaStream_t.
Definition: NvInfer.h:98
Dims3(int d0, int d1, int d2)
Construct a Dims3 from 3 elements.
Definition: NvInfer.h:295
virtual void setStride(DimsHW stride)=0
Set the stride for pooling.
virtual int getSeqLength() const =0
Get the sequence length.
Descriptor for four-dimensional data.
Definition: NvInfer.h:382
virtual DimsHW getStride() const =0
Get the stride for pooling.
Elements correspond to different channels.
virtual void setStrictTypeConstraints(bool mode)=0
Set whether or not type constraints are strict.
int EnumMax< ReduceOperation >()
Maximum number of elements in ReduceOperation enum.
Definition: NvInfer.h:2902
int EnumMax< DimensionType >()
Maximum number of elements in DimensionType enum.
Definition: NvInfer.h:163
nvinfer1::Dims field type.
ActivationType
Enumerates the types of activation to perform in an activation layer.
Definition: NvInfer.h:1143
virtual void setDeconvolutionOutputDimensionsFormula(IOutputDimensionsFormula *formula)=0
Set the deconvolution output dimensions formula.
Descriptor for data with one index dimension, one channel dimension and two spatial dimensions...
Definition: NvInfer.h:416
virtual void setKernelWeights(Weights weights)=0
Set the kernel weights, given as a KxC matrix in row-major order.
virtual bool setDynamicRange(float min, float max)=0
Set user calibration scales.
virtual void setMaxBatchSize(int batchSize)=0
Set the maximum batch size.
virtual int getBindingIndex(const char *name) const =0
Retrieve the binding index for a named tensor.
virtual IFullyConnectedLayer * addFullyConnected(ITensor &input, int nbOutputs, Weights kernelWeights, Weights biasWeights)=0
Add a fully connected layer to the network.
TENSORRTAPI nvinfer1::IPluginRegistry * getPluginRegistry()
Return the plugin registry.
virtual void setGatherAxis(int axis)=0
Set the non-batch dimension axis to gather on. The axis must be less than the number of non-batch dim...
virtual int getK() const =0
Get the k value for the layer.
virtual const void * readCalibrationCache(std::size_t &length)=0
Load a calibration cache.
virtual void setBiasForGate(int layerIndex, RNNGateType gate, bool isW, Weights bias)=0
Set the bias parameters for an individual gate in the RNN.
virtual void setDeviceType(ILayer *layer, DeviceType deviceType)=0
Set the device that this layer must execute on.
virtual IReduceLayer * addReduce(ITensor &input, ReduceOperation operation, uint32_t reduceAxes, bool keepDimensions)=0
Add a reduce layer to the network.
Data stored on host.
virtual void setPadding(DimsHW padding)=0
Set the padding for pooling.
DimsHW()
Construct an empty DimsHW object.
Definition: NvInfer.h:225
virtual void setWindowSize(DimsHW windowSize)=0
Set the window size for pooling.
Base class for all layer classes in a network definition.
Definition: NvInfer.h:753
quantized INT8 format.
virtual void setTranspose(int index, bool val)=0
Set the transpose flag for an input tensor.
Dims4(int d0, int d1, int d2, int d3)
Construct a Dims4 from 4 elements.
Definition: NvInfer.h:402
int EnumMax< PoolingType >()
Maximum number of elements in PoolingType enum.
Definition: NvInfer.h:1199
int & h()
Get the height.
Definition: NvInfer.h:479
virtual int getWindowSize() const =0
Get the LRN window size.
int & h()
Get the height.
Definition: NvInfer.h:248
virtual void free(void *memory)=0
virtual void reset(nvinfer1::INetworkDefinition &network)=0
Resets the builder state.
virtual void setActivationType(ActivationType type)=0
Set the type of activation to be performed.
TENSORRTAPI int getInferLibVersion()
Return the library version number.
virtual std::size_t getHiddenSize() const =0
Get the size of the hidden layers.
int & c()
Get the channel count.
Definition: NvInfer.h:465
virtual void setStride(DimsHW stride)=0
Get the stride of the deconvolution.
Structure to define the dimensions of a tensor.
Definition: NvInfer.h:176
Network iterates from first to last and vice versa and outputs concatenated.
#define NV_TENSORRT_PATCH
TensorRT patch version.
Definition: NvInfer.h:58
virtual IRNNLayer * addRNN(ITensor &inputs, int layerCount, std::size_t hiddenSize, int maxSeqLen, RNNOperation op, RNNInputMode mode, RNNDirection dir, Weights weights, Weights bias)=0
Add an layerCount deep RNN layer to the network with a sequence length of maxSeqLen and hiddenSize in...
virtual bool precisionIsSet() const =0
whether the computational precision has been set for this layer
virtual void setReduceAxes(uint32_t reduceAxes)=0
Set which axes to reduce for the layer.
virtual void setGpuAllocator(IGpuAllocator *allocator)=0
Set the GPU allocator.
Layer type for shuffling data.
Definition: NvInfer.h:3026
virtual bool platformHasFastFp16() const =0
Determine whether the platform has fast native fp16.
DimsCHW(int channels, int height, int width)
Construct a DimsCHW given channel count, height and width.
Definition: NvInfer.h:328
A elementwise layer in a network definition.
Definition: NvInfer.h:1743
A Pooling layer in a network definition.
Definition: NvInfer.h:1212
virtual void setDefaultDeviceType(DeviceType deviceType)=0
Sets the default DeviceType to be used by the builder. It ensures that all the layers that can run on...
#define _TENSORRT_FINAL
Defines which symbols are exported.
Definition: NvInfer.h:68
virtual RNNOperation getOperation() const =0
Get the operation of the RNN layer.
virtual void serialize(void *buffer)=0
Serialize the layer.
virtual void setDilation(DimsHW dims)=0
Set the dilation for a convolution.
virtual void setOperation(RNNOperation op)=0
Set the operation of the RNN layer.
virtual void configureWithFormat(const Dims *inputDims, int nbInputs, const Dims *outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize)=0
Configure the layer.
virtual DimsHW getPrePadding() const =0
Set the padding that is applied at the start of the tensor.
virtual void setDirection(RNNDirection op)=0
Set the direction of the RNN layer.
int d[MAX_DIMS]
The extent of each dimension.
Definition: NvInfer.h:181
Application-implemented interface to compute layer output sizes.
Definition: NvInfer.h:2458
Minimum of the two elements.
virtual Weights getBiasWeights() const =0
Get the bias weights.
#define NV_TENSORRT_MINOR
TensorRT minor version.
Definition: NvInfer.h:57
virtual int getNbBindings() const =0
Get the number of binding indices.
virtual void setPadding(DimsHW padding)=0
Set the padding of the convolution.
virtual Dims getDimensions() const =0
Get the dimensions of a tensor.
virtual void removeTensor(ITensor &tensor)=0
remove a tensor from the network definition.
ElementWiseOperation
Enumerates the binary operations that may be performed by an ElementWise layer.
Definition: NvInfer.h:1717
int64_t count
The number of weights in the array.
Definition: NvInfer.h:516
Three-gate network consisting of Gated Recurrent Units.
virtual const char * getName() const =0
Get the tensor name.
virtual const char * getPluginType() const =0
Return the plugin type. Should match the plugin name returned by the corresponding plugin creator...
RNNGateType
Identifies an individual gate within an RNN cell.
Definition: NvInfer.h:2272
Network iterations from first input to last input.
virtual void setBiasWeights(Weights weights)=0
Set the bias weights for the deconvolution.
int & c()
Get the channel count.
Definition: NvInfer.h:340
virtual void setSecondTranspose(Permutation permutation)=0
Set the permutation applied by the second transpose operation.
virtual bool getKeepDimensions() const =0
Get the boolean that specifies whether or not to keep the reduced dimensions for the layer...
virtual bool isNetworkOutput() const =0
Whether the tensor is a network output.
Dims3()
Construct an empty Dims3 object.
Definition: NvInfer.h:282
virtual bool getBroadcastAcrossBatch() const =0
Check if tensor is broadcast across the batch.
virtual ITensor * getCellState() const =0
Get the initial cell state of the RNN.
virtual void setCellState(ITensor &cell)=0
Set the initial cell state of the LSTM with the provided cell ITensor.
A tensor in a network definition.
Definition: NvInfer.h:601
virtual void destroy()=0
Destroy this object;.
virtual void setLocation(TensorLocation location)=0
Set the storage location of a tensor.
virtual IPluginCreator *const * getPluginCreatorList(int *numCreators) const =0
Return all the registered plugin creators and the number of registered plugin creators. Returns nullptr if none found.
int EnumMax< UnaryOperation >()
Maximum number of elements in UnaryOperation enum.
Definition: NvInfer.h:2856
virtual void setBlendFactor(float blendFactor)=0
Set the blending factor for the max_average_blend mode: max_average_blendPool = (1-blendFactor)*maxPo...
An array of weights used as a layer parameter.
Definition: NvInfer.h:511
int & w()
Get the width.
Definition: NvInfer.h:493
const char * name
Plugin field attribute name.
Definition: NvInfer.h:2739
int h() const
Get the height.
Definition: NvInfer.h:255
virtual int getDataLength() const =0
Get the length of the data being processed by the RNN for use in computing other values.
virtual DeviceType getDeviceType(const ILayer *layer) const =0
Get the device that this layer executes on.
virtual nvinfer1::ICudaEngine * deserializeCudaEngine(const void *blob, std::size_t size, IPluginFactory *pluginFactory)=0
Deserialize an engine from a stream.
virtual bool registerCreator(IPluginCreator &)=0
Register a plugin creator. Returns false if one with same type is already registered.
virtual nvinfer1::ICudaEngine * buildCudaEngine(nvinfer1::INetworkDefinition &network)=0
Build a CUDA engine from a network definition.
Dims2(int d0, int d1)
Construct a Dims2 from 2 elements.
Definition: NvInfer.h:207
virtual Weights getShift() const =0
Get the shift value.
virtual void setAxis(int axis)=0
Set the axis along which concatenation occurs.
virtual int getNbOutputMaps() const =0
Get the number of output feature maps for the deconvolution.
Dims4()
Construct an empty Dims2 object.
Definition: NvInfer.h:388
virtual void setScale(Weights scale)=0
Set the scale value.
virtual void setInput(int index, ITensor &tensor)=0
replace an input of this layer with a specific tensor
virtual IPoolingLayer * addPooling(ITensor &input, PoolingType type, DimsHW windowSize)=0
Add a pooling layer to the network.
int nbDims
The number of dimensions.
Definition: NvInfer.h:180
virtual bool platformHasFastInt8() const =0
Determine whether the platform has fast native int8.
virtual DimsHW getStride() const =0
Get the stride of the convolution.
Application-implemented interface for calibration.
Definition: NvInfer.h:4231
Application-implemented logging interface for the builder, engine and runtime.
Definition: NvInfer.h:4748
virtual DataType getOutputType(int index) const =0
get the output type of this layer
virtual IPluginLayer * addPluginExt(ITensor *const *inputs, int nbInputs, IPluginExt &plugin)=0
Add a plugin layer to the network using an IPluginExt interface.
virtual void setDirection(RNNDirection op)=0
Set the direction of the RNN layer.
virtual void setInt8Calibrator(IInt8Calibrator *calibrator)=0
Set Int8 Calibration interface.
virtual bool outputTypeIsSet(int index) const =0
whether the output type has been set for this layer
DimsHW(int height, int width)
Construct a DimsHW given height and width.
Definition: NvInfer.h:237
virtual void * data() const =0
A pointer to the raw data that is owned by the library.
Identical coefficients across all elements of the tensor.
int EnumMax< ElementWiseOperation >()
Maximum number of elements in ElementWiseOperation enum.
Definition: NvInfer.h:1729
Plugin class for user-implemented layers.
Definition: NvInfer.h:2613
Layer that represents a padding operation.
Definition: NvInfer.h:2969
virtual void setBiasWeights(Weights weights)=0
Set the bias weights.
virtual IIdentityLayer * addIdentity(ITensor &input)=0
Add an identity layer.
virtual void setNbOutputMaps(int nbOutputMaps)=0
Set the number of output feature maps for the deconvolution.
virtual void setNbOutputChannels(int nbOutputs)=0
Set the number of output channels K from the fully connected layer.
virtual void setKernelWeights(Weights weights)=0
Set the kernel weights for the convolution.
virtual Weights getBiasWeights() const =0
Get the bias weights for the convolution.
virtual size_t getWorkspaceSize(int maxBatchSize) const =0
Find the workspace size required by the layer.
virtual uint32_t getReduceAxes() const =0
Get the axes over which to reduce for the layer.
virtual ITensor * getOutput(int index) const =0
Get the layer output corresponding to the given index.
virtual IPlugin & getPlugin()=0
Get the plugin for the layer.
virtual void setInputMode(RNNInputMode op)=0
Set the operation of the RNN layer.
virtual void setNbGroups(int nbGroups)=0
Set the number of groups for a convolution.
virtual void setCellState(ITensor &cell)=0
Set the initial cell state of the RNN with the provided cell ITensor.
virtual void setKernelSize(DimsHW kernelSize)=0
Set the HW kernel size of the convolution.
CalibrationAlgoType
Version of calibration algorithm to use.
Definition: NvInfer.h:4208
virtual void setStride(DimsHW stride)=0
Get the stride of the convolution.
virtual float getAlpha() const =0
Get the LRN alpha value.
virtual void setReduceAxes(uint32_t reduceAxes)=0
Set the axes over which to reduce.
Informational messages.
virtual CalibrationAlgoType getAlgorithm()
Definition: NvInfer.h:4300
virtual IConvolutionLayer * addConvolution(ITensor &input, int nbOutputMaps, DimsHW kernelSize, Weights kernelWeights, Weights biasWeights)=0
Add a convolution layer to the network.
virtual void setFirstTranspose(Permutation permutation)=0
Set the permutation applied by the first transpose operation.
DimsNCHW()
Construct an empty DimsNCHW object.
Definition: NvInfer.h:422
Class to handle library allocated memory that is accessible to the user.
Definition: NvInfer.h:527
LayerType
The type values of layer classes.
Definition: NvInfer.h:545
int EnumMax< ScaleMode >()
Maximum number of elements in ScaleMode enum.
Definition: NvInfer.h:1411
A layer that represents the identity function.
Definition: NvInfer.h:3241
virtual bool canRunOnDLA(const ILayer *layer) const =0
Checks if a layer can run on DLA.
virtual DimsHW compute(DimsHW inputDims, DimsHW kernelSize, DimsHW stride, DimsHW padding, DimsHW dilation, const char *layerName) const =0
Application-implemented interface to compute the HW output dimensions of a layer from the layer input...
virtual void setDimensions(Dims dimensions)=0
Set the dimensions for the layer.
virtual Weights getBiasWeights() const =0
Get the bias weights for the deconvolution.
virtual void setProfiler(IProfiler *)=0
Set the profiler.
virtual void setWeights(Weights weights)=0
Set the weights for the layer.
RNNInputMode
Enumerates the RNN input modes that may occur with an RNN layer.
Definition: NvInfer.h:1919
A RNN layer in a network definition.
Definition: NvInfer.h:1940
virtual IPluginCreator * getPluginCreator(const char *pluginType, const char *pluginVersion)=0
Return plugin creator based on type and version.
virtual void setName(const char *name)=0
Set the name of a layer.
virtual ActivationType getActivationType() const =0
Get the type of activation to be performed.
virtual Dims getDimensions() const =0
Get the dimensions for the layer.
virtual void setDebugSync(bool sync)=0
Set the debug sync flag.
virtual void destroy()=0
Destroy this object.
virtual void unmarkOutput(ITensor &tensor)=0
unmark a tensor as a network output.
virtual void setPostPadding(DimsHW padding)=0
Set the padding that is applied at the end of the tensor.
Layer type for plugins.
Definition: NvInfer.h:2695
virtual UnaryOperation getOperation() const =0
Get the unary operation for the layer.
DimensionType
The type of data encoded across this dimension.
Definition: NvInfer.h:154
virtual IConcatenationLayer * addConcatenation(ITensor *const *inputs, int nbInputs)=0
Add a concatenation layer to the network.
Four-gate LSTM network w/o peephole connections.
virtual DimsHW getPostPadding() const =0
Set the padding that is applied at the end of the tensor.
virtual const ICudaEngine & getEngine() const =0
Get the associated engine.
virtual void * allocate(uint64_t size, uint64_t alignment, uint32_t flags)=0
virtual void setHiddenState(ITensor &hidden)=0
Set the initial hidden state of the RNN with the provided hidden ITensor.
virtual TensorLocation getLocation(int bindingIndex) const =0
Get location of binding.
virtual ScaleMode getMode() const =0
Set the scale mode.
virtual ILRNLayer * addLRN(ITensor &input, int window, float alpha, float beta, float k)=0
Add a LRN layer to the network.
virtual int getTensorRTVersion() const
Return the API version with which this plugin was built.
Definition: NvInfer.h:2621
virtual void setMaxWorkspaceSize(std::size_t workspaceSize)=0
Set the maximum workspace size.
virtual void setShift(Weights shift)=0
Set the shift value.
virtual size_t getDeviceMemorySize() const =0
Return the amount of device memory required by an execution context.
virtual Weights getKernelWeights() const =0
Get the kernel weights for the deconvolution.
Application-implemented class for controlling allocation on the GPU.
Definition: NvInfer.h:4363
Context for executing inference using an engine.
Definition: NvInfer.h:3943
virtual ITensor * getInput(int index) const =0
Get the input tensor specified by the given index.
virtual int getMaxDLABatchSize(DeviceType deviceType) const =0
Get the maximum batch size DLA can support. For any tensor the total volume of index dimensions combi...
virtual void setPower(Weights power)=0
Set the power value.
virtual double getRegressionCutoff() const =0
The fraction (between 0 and 1) of the maximum used to define the regression cutoff when using regress...
virtual void setHalf2Mode(bool mode)=0
Set whether half2 mode is used.
virtual void setOutputType(int index, DataType dataType)=0
Set the output type of this layer.
virtual std::size_t getMaxWorkspaceSize() const =0
Get the maximum workspace size.
virtual int getAxis() const =0
Get the axis along which concatenation occurs.
Descriptor for two-dimensional spatial data.
Definition: NvInfer.h:219
UnaryOperation
Enumerates the unary operations that may be performed by a Unary layer.
Definition: NvInfer.h:2845
virtual void setAverageFindIterations(int avgFind)=0
Set the number of averaging iterations used when timing layers.
virtual int getNbOutputMaps() const =0
Get the number of output maps for the convolution.
virtual DeviceType getDefaultDeviceType() const =0
Get the default DeviceType which was set by setDefaultDeviceType.
virtual RNNOperation getOperation() const =0
Get the operation of the RNN layer.
DeviceType
The device that this layer/network will execute on.
Definition: NvInfer.h:137
virtual bool getHalf2Mode() const =0
Query whether half2 mode is used.
virtual int getNbInputs() const =0
Get the number of inputs of a layer.
virtual ISoftMaxLayer * addSoftMax(ITensor &input)=0
Add a SoftMax layer to the network.
Elements correspond to different batch index.
virtual Weights getBiasForGate(int layerIndex, RNNGateType gate, bool isW) const =0
Get the bias parameters for an individual gate in the RNN.
Descriptor for two-dimensional data.
Definition: NvInfer.h:189
virtual int getMaxBatchSize() const =0
Get the maximum batch size which can be used for inference.
virtual void setName(const char *name)=0
Set the tensor name.
virtual IOutputDimensionsFormula & getPoolingOutputDimensionsFormula() const =0
Get the pooling output dimensions formula.
virtual int getNbOutputs() const =0
Get the number of outputs of a layer.
virtual IGatherLayer * addGather(ITensor &data, ITensor &indices, int axis)=0
Add a gather layer to the network.
virtual IOutputDimensionsFormula & getDeconvolutionOutputDimensionsFormula() const =0
Get the deconvolution output dimensions formula.
virtual void setDimensions(Dims dimensions)=0
Set the dimensions of a tensor.
virtual void setWindowSize(int windowSize)=0
Set the LRN window size.
virtual Weights getKernelWeights() const =0
Get the kernel weights for the convolution.
virtual CalibrationAlgoType getAlgorithm()=0
Get the algorithm used by this calibrator.
Application-implemented interface for profiling.
Definition: NvInfer.h:3918
virtual RNNInputMode getInputMode() const =0
Get the operation of the RNN layer.
A concatenation layer in a network definition.
Definition: NvInfer.h:1547
virtual void setK(float k)=0
Set the LRN K value.
virtual void setNbGroups(int nbGroups)=0
Set the number of groups for a deconvolution.
const void * data
Plugin field attribute data.
Definition: NvInfer.h:2743
virtual IPluginLayer * addPlugin(ITensor *const *inputs, int nbInputs, IPlugin &plugin)=0
Add a plugin layer to the network.
int EnumMax< RNNDirection >()
Maximum number of elements in RNNDirection enum.
Definition: NvInfer.h:1899
virtual const void * readHistogramCache(std::size_t &length)=0
Load a histogram.
virtual void allowGPUFallback(bool setFallBackMode)=0
Sets the builder to use GPU if a layer that was supposed to run on DLA can not run on DLA...
int nbFields
Number of PluginField entries.
Definition: NvInfer.h:2765
virtual Permutation getFirstTranspose() const =0
Get the permutation applied by the first transpose operation.
virtual const char * getPluginVersion() const =0
Return the plugin version.
virtual IExecutionContext * createExecutionContextWithoutDeviceMemory()=0
create an execution context without any device memory allocated
virtual void setKernelSize(DimsHW kernelSize)=0
Set the HW kernel size of the convolution.
virtual void setWeights(Weights weights)=0
Set the weight parameters for the RNN.
virtual Weights getWeights() const =0
Get the W weights for the RNN.
virtual const char * getPluginName() const =0
Return the plugin name.
virtual void terminate()=0
Release resources acquired during plugin layer initialization. This is called when the engine is dest...
virtual PoolingType getPoolingType() const =0
Get the type of activation to be performed.
PluginFieldType type
Plugin field attribute type.
Definition: NvInfer.h:2748
A deconvolution layer in a network definition.
Definition: NvInfer.h:1578
virtual IShuffleLayer * addShuffle(ITensor &input)=0
Add a shuffle layer to the network.
virtual void setPoolingType(PoolingType type)=0
Set the type of activation to be performed.
virtual IOutputDimensionsFormula & getConvolutionOutputDimensionsFormula() const =0
Get the convolution output dimensions formula.
virtual void setDebugSync(bool sync)=0
Set whether the builder should use debug synchronization.
virtual uint32_t getReduceAxes() const =0
Get the axes to reduce for the layer.
virtual int initialize()=0
Initialize the layer for execution. This is called when the engine is created.
Definition: NvInfer.h:4294
int EnumMax< RNNInputMode >()
Maximum number of elements in RNNInputMode enum.
Definition: NvInfer.h:1926