TensorRT  5.1.5.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
NvInfer.h
Go to the documentation of this file.
1 /*
2  * Copyright 1993-2019 NVIDIA Corporation. All rights reserved.
3  *
4  * NOTICE TO LICENSEE:
5  *
6  * This source code and/or documentation ("Licensed Deliverables") are
7  * subject to NVIDIA intellectual property rights under U.S. and
8  * international Copyright laws.
9  *
10  * These Licensed Deliverables contained herein is PROPRIETARY and
11  * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12  * conditions of a form of NVIDIA software license agreement by and
13  * between NVIDIA and Licensee ("License Agreement") or electronically
14  * accepted by Licensee. Notwithstanding any terms or conditions to
15  * the contrary in the License Agreement, reproduction or disclosure
16  * of the Licensed Deliverables to any third party without the express
17  * written consent of NVIDIA is prohibited.
18  *
19  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20  * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21  * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22  * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23  * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24  * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25  * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27  * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28  * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29  * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30  * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32  * OF THESE LICENSED DELIVERABLES.
33  *
34  * U.S. Government End Users. These Licensed Deliverables are a
35  * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36  * 1995), consisting of "commercial computer software" and "commercial
37  * computer software documentation" as such terms are used in 48
38  * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39  * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40  * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41  * U.S. Government End Users acquire the Licensed Deliverables with
42  * only those rights set forth herein.
43  *
44  * Any use of the Licensed Deliverables in individual and commercial
45  * software must include, in the user documentation and internal
46  * comments to the code, the above Disclaimer and U.S. Government End
47  * Users Notice.
48  */
49 
50 #ifndef NV_INFER_H
51 #define NV_INFER_H
52 
53 #include <cstddef>
54 #include <cstdint>
55 
56 #define NV_TENSORRT_MAJOR 5
57 #define NV_TENSORRT_MINOR 1
58 #define NV_TENSORRT_PATCH 5
59 #define NV_TENSORRT_BUILD 0
60 
61 #define NV_TENSORRT_SONAME_MAJOR 5
62 #define NV_TENSORRT_SONAME_MINOR 1
63 #define NV_TENSORRT_SONAME_PATCH 5
64 
65 #if __cplusplus > 201103L
66 #define _TENSORRT_FINAL final
67 #define _TENSORRT_OVERRIDE override
68 #else
69 #define _TENSORRT_FINAL
70 #define _TENSORRT_OVERRIDE
71 #endif
72 
74 #ifdef TENSORRT_BUILD_LIB
75 #ifdef _MSC_VER
76 #define TENSORRTAPI __declspec(dllexport)
77 #else
78 #define TENSORRTAPI __attribute__((visibility("default")))
79 #endif
80 #else
81 #define TENSORRTAPI
82 #endif
83 
93 
99 
100 // forward declare some CUDA types to avoid an include dependency
101 
102 struct cublasContext;
103 struct cudnnContext;
104 
105 typedef struct CUstream_st* cudaStream_t;
106 typedef struct CUevent_st* cudaEvent_t;
107 
108 static const int NV_TENSORRT_VERSION = (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSORRT_PATCH; // major, minor, patch
109 
115 namespace nvinfer1
116 {
117 
118 template <typename T>
119 inline int EnumMax();
120 
125 enum class DataType : int
126 {
127  kFLOAT = 0,
128  kHALF = 1,
129  kINT8 = 2,
130  kINT32 = 3
131 };
132 
133 template <>
134 inline int EnumMax<DataType>()
135 {
136  return 4;
137 }
138 
144 enum class DeviceType : int
145 {
146  kGPU,
147  kDLA,
148 };
149 template <>
151 {
152  return 2;
153 }
154 
159 enum class DimensionType : int
160 {
161  kSPATIAL = 0,
162  kCHANNEL = 1,
163  kINDEX = 2,
164  kSEQUENCE = 3
165 };
166 
167 template <>
169 {
170  return 4;
171 }
172 
181 class Dims
182 {
183 public:
184  static const int MAX_DIMS = 8;
185  int nbDims;
186  int d[MAX_DIMS];
188 };
189 
194 class Dims2 : public Dims
195 {
196 public:
201  {
202  nbDims = 2;
203  d[0] = d[1] = 0;
204  }
205 
212  Dims2(int d0, int d1)
213  {
214  nbDims = 2;
215  d[0] = d0;
216  d[1] = d1;
217  }
218 };
219 
224 class DimsHW : public Dims2
225 {
226 public:
231  : Dims2()
232  {
234  }
235 
242  DimsHW(int height, int width)
243  : Dims2(height, width)
244  {
246  }
247 
253  int& h() { return d[0]; }
254 
260  int h() const { return d[0]; }
261 
267  int& w() { return d[1]; }
268 
274  int w() const { return d[1]; }
275 };
276 
281 class Dims3 : public Dims
282 {
283 public:
288  {
289  nbDims = 3;
290  d[0] = d[1] = d[2] = 0;
291  }
292 
300  Dims3(int d0, int d1, int d2)
301  {
302  nbDims = 3;
303  d[0] = d0;
304  d[1] = d1;
305  d[2] = d2;
306  }
307 };
308 
313 class DimsCHW : public Dims3
314 {
315 public:
320  : Dims3()
321  {
324  }
325 
333  DimsCHW(int channels, int height, int width)
334  : Dims3(channels, height, width)
335  {
338  }
339 
345  int& c() { return d[0]; }
346 
352  int c() const { return d[0]; }
353 
359  int& h() { return d[1]; }
360 
366  int h() const { return d[1]; }
367 
373  int& w() { return d[2]; }
374 
380  int w() const { return d[2]; }
381 };
382 
387 class Dims4 : public Dims
388 {
389 public:
394  {
395  nbDims = 4;
396  d[0] = d[1] = d[2] = d[3] = 0;
397  }
398 
407  Dims4(int d0, int d1, int d2, int d3)
408  {
409  nbDims = 4;
410  d[0] = d0;
411  d[1] = d1;
412  d[2] = d2;
413  d[3] = d3;
414  }
415 };
416 
421 class DimsNCHW : public Dims4
422 {
423 public:
428  : Dims4()
429  {
433  }
434 
443  DimsNCHW(int batchSize, int channels, int height, int width)
444  : Dims4(batchSize, channels, height, width)
445  {
449  }
450 
456  int& n() { return d[0]; }
457 
463  int n() const { return d[0]; }
464 
470  int& c() { return d[1]; }
471 
477  int c() const { return d[1]; }
478 
484  int& h() { return d[2]; }
485 
491  int h() const { return d[2]; }
492 
498  int& w() { return d[3]; }
499 
505  int w() const { return d[3]; }
506 };
507 
516 class Weights
517 {
518 public:
520  const void* values;
521  int64_t count;
522 };
523 
535 {
536 public:
537  virtual void* data() const = 0;
538  virtual std::size_t size() const = 0;
539  virtual DataType type() const = 0;
540  virtual void destroy() = 0;
541 protected:
542  virtual ~IHostMemory() {}
543 };
544 
552 enum class LayerType : int
553 {
554  kCONVOLUTION = 0,
555  kFULLY_CONNECTED = 1,
556  kACTIVATION = 2,
557  kPOOLING = 3,
558  kLRN = 4,
559  kSCALE = 5,
560  kSOFTMAX = 6,
561  kDECONVOLUTION = 7,
562  kCONCATENATION = 8,
563  kELEMENTWISE = 9,
564  kPLUGIN = 10,
565  kRNN = 11,
566  kUNARY = 12,
567  kPADDING = 13,
568  kSHUFFLE = 14,
569  kREDUCE = 15,
570  kTOPK = 16,
571  kGATHER = 17,
572  kMATRIX_MULTIPLY = 18,
573  kRAGGED_SOFTMAX = 19,
574  kCONSTANT = 20,
575  kRNN_V2 = 21,
576  kIDENTITY = 22,
577  kPLUGIN_V2 = 23,
578  kSLICE = 24
579 };
580 
581 template <>
582 inline int EnumMax<LayerType>()
583 {
584  return 25;
585 }
586 
591 enum class TensorLocation : int
592 {
593  kDEVICE = 0,
594  kHOST = 1
595 };
596 
597 template <>
599 {
600  return 2;
601 }
602 
612 class ITensor
613 {
614 public:
627  virtual void setName(const char* name) = 0;
628 
636  virtual const char* getName() const = 0;
637 
652  virtual void setDimensions(Dims dimensions) = 0; // only valid for input tensors
653 
661  virtual Dims getDimensions() const = 0;
662 
676  virtual void setType(DataType type) = 0;
677 
685  virtual DataType getType() const = 0;
686 
697  virtual bool setDynamicRange(float min, float max) = 0;
698 
706  virtual float getDynamicRange() const = 0;
707 
711  virtual bool isNetworkInput() const = 0;
712 
716  virtual bool isNetworkOutput() const = 0;
717 
718 protected:
719  virtual ~ITensor() {}
720 
721 public:
736  virtual void setBroadcastAcrossBatch(bool broadcastAcrossBatch) = 0;
737 
748  virtual bool getBroadcastAcrossBatch() const = 0;
749 
755  virtual TensorLocation getLocation() const = 0;
756 
767  virtual void setLocation(TensorLocation location) = 0;
768 
774  virtual bool dynamicRangeIsSet() const = 0;
775 
779  virtual void resetDynamicRange() = 0;
780 
786  virtual float getDynamicRangeMin() const = 0;
787 
793  virtual float getDynamicRangeMax() const = 0;
794 };
795 
803 class ILayer
804 {
805 public:
811  virtual LayerType getType() const = 0;
812 
820  virtual void setName(const char* name) = 0;
821 
825 
828  virtual const char* getName() const = 0;
829 
833  virtual int getNbInputs() const = 0;
834 
842  virtual ITensor* getInput(int index) const = 0;
843 
847  virtual int getNbOutputs() const = 0;
848 
854  virtual ITensor* getOutput(int index) const = 0;
855 
865  virtual void setInput(int index, ITensor& tensor) = 0;
866 
876 
877  virtual void setPrecision(DataType dataType) = 0;
878 
885 
886  virtual DataType getPrecision() const = 0;
887 
894 
895  virtual bool precisionIsSet() const = 0;
896 
901 
902  virtual void resetPrecision() = 0;
903 
914 
915  virtual void setOutputType(int index, DataType dataType) = 0;
916 
925 
926  virtual DataType getOutputType(int index) const = 0;
927 
935 
936  virtual bool outputTypeIsSet(int index) const = 0;
937 
944 
945  virtual void resetOutputType(int index) = 0;
946 
947 protected:
948  virtual ~ILayer() {}
949 };
950 
962 enum class PaddingMode : int
963 {
965  kEXPLICIT_ROUND_UP = 1,
966  kSAME_UPPER = 2,
967  kSAME_LOWER = 3,
968  kCAFFE_ROUND_DOWN = 4,
969  kCAFFE_ROUND_UP = 5
970 };
971 
972 template <>
974 {
975  return 6;
976 }
977 
991 class IConvolutionLayer : public ILayer
992 {
993 public:
1001  virtual void setKernelSize(DimsHW kernelSize) = 0;
1002 
1008  virtual DimsHW getKernelSize() const = 0;
1009 
1017  virtual void setNbOutputMaps(int nbOutputMaps) = 0;
1018 
1024  virtual int getNbOutputMaps() const = 0;
1025 
1035  virtual void setStride(DimsHW stride) = 0;
1036 
1040  virtual DimsHW getStride() const = 0;
1041 
1053  virtual void setPadding(DimsHW padding) = 0;
1054 
1060  virtual DimsHW getPadding() const = 0;
1061 
1074  virtual void setNbGroups(int nbGroups) = 0;
1075 
1081  virtual int getNbGroups() const = 0;
1082 
1091  virtual void setKernelWeights(Weights weights) = 0;
1092 
1098  virtual Weights getKernelWeights() const = 0;
1099 
1109  virtual void setBiasWeights(Weights weights) = 0;
1110 
1116  virtual Weights getBiasWeights() const = 0;
1117 
1125  virtual void setDilation(DimsHW dims) = 0;
1126 
1132  virtual DimsHW getDilation() const = 0;
1133 
1134 protected:
1135  virtual ~IConvolutionLayer() {}
1136 
1137 public:
1149  virtual void setPrePadding(Dims padding) = 0;
1150 
1156  virtual Dims getPrePadding() const = 0;
1157 
1169  virtual void setPostPadding(Dims padding) = 0;
1170 
1176  virtual Dims getPostPadding() const = 0;
1177 
1186  virtual void setPaddingMode(PaddingMode paddingMode) = 0;
1187 
1194  virtual PaddingMode getPaddingMode() const = 0;
1195 };
1196 
1227 {
1228 public:
1236  virtual void setNbOutputChannels(int nbOutputs) = 0;
1237 
1243  virtual int getNbOutputChannels() const = 0;
1244 
1250  virtual void setKernelWeights(Weights weights) = 0;
1251 
1257  virtual Weights getKernelWeights() const = 0;
1258 
1266  virtual void setBiasWeights(Weights weights) = 0;
1267 
1273  virtual Weights getBiasWeights() const = 0;
1274 
1275 protected:
1276  virtual ~IFullyConnectedLayer() {}
1277 };
1278 
1284 enum class ActivationType : int
1285 {
1286  kRELU = 0,
1287  kSIGMOID = 1,
1288  kTANH = 2,
1289  kLEAKY_RELU = 3,
1290  kELU = 4,
1291  kSELU = 5,
1292  kSOFTSIGN = 6,
1293  kSOFTPLUS = 7,
1294  kCLIP = 8,
1295  kHARD_SIGMOID = 9,
1296  kSCALED_TANH = 10,
1297  kTHRESHOLDED_RELU = 11
1298 };
1299 
1300 template <>
1302 {
1303  return 12;
1304 }
1305 
1317 class IActivationLayer : public ILayer
1318 {
1319 public:
1325  virtual void setActivationType(ActivationType type) = 0;
1326 
1332  virtual ActivationType getActivationType() const = 0;
1333 
1334 protected:
1335  virtual ~IActivationLayer() {}
1336 public:
1347  virtual void setAlpha(float alpha) = 0;
1348 
1358  virtual void setBeta(float beta) = 0;
1359 
1364  virtual float getAlpha() const = 0;
1365 
1370  virtual float getBeta() const = 0;
1371 };
1372 
1378 enum class PoolingType : int
1379 {
1380  kMAX = 0, // Maximum over elements
1381  kAVERAGE = 1, // Average over elements. If the tensor is padded, the count includes the padding
1382  kMAX_AVERAGE_BLEND = 2 // Blending between the max pooling and average pooling: (1-blendFactor)*maxPool + blendFactor*avgPool
1383 };
1384 
1385 template <>
1387 {
1388  return 3;
1389 }
1390 
1401 class IPoolingLayer : public ILayer
1402 {
1403 public:
1411  virtual void setPoolingType(PoolingType type) = 0;
1412 
1418  virtual PoolingType getPoolingType() const = 0;
1419 
1427  virtual void setWindowSize(DimsHW windowSize) = 0;
1428 
1434  virtual DimsHW getWindowSize() const = 0;
1435 
1445  virtual void setStride(DimsHW stride) = 0;
1446 
1452  virtual DimsHW getStride() const = 0;
1453 
1463  virtual void setPadding(DimsHW padding) = 0;
1464 
1472  virtual DimsHW getPadding() const = 0;
1473 
1481  virtual void setBlendFactor(float blendFactor) = 0;
1482 
1490  virtual float getBlendFactor() const = 0;
1491 
1500  virtual void setAverageCountExcludesPadding(bool exclusive) = 0;
1501 
1507  virtual bool getAverageCountExcludesPadding() const = 0;
1508 
1509 protected:
1510  virtual ~IPoolingLayer() {}
1511 
1512 public:
1524  virtual void setPrePadding(Dims padding) = 0;
1525 
1531  virtual Dims getPrePadding() const = 0;
1532 
1544  virtual void setPostPadding(Dims padding) = 0;
1545 
1551  virtual Dims getPostPadding() const = 0;
1552 
1561  virtual void setPaddingMode(PaddingMode paddingMode) = 0;
1562 
1569  virtual PaddingMode getPaddingMode() const = 0;
1570 };
1571 
1581 class ILRNLayer : public ILayer
1582 {
1583 public:
1590  virtual void setWindowSize(int windowSize) = 0;
1591 
1597  virtual int getWindowSize() const = 0;
1598 
1605  virtual void setAlpha(float alpha) = 0;
1606 
1612  virtual float getAlpha() const = 0;
1613 
1620  virtual void setBeta(float beta) = 0;
1621 
1627  virtual float getBeta() const = 0;
1628 
1635  virtual void setK(float k) = 0;
1636 
1642  virtual float getK() const = 0;
1643 
1644 protected:
1645  virtual ~ILRNLayer() {}
1646 };
1647 
1653 enum class ScaleMode : int
1654 {
1655  kUNIFORM = 0,
1656  kCHANNEL = 1,
1657  kELEMENTWISE = 2
1658 };
1659 
1660 template <>
1662 {
1663  return 3;
1664 }
1665 
1688 class IScaleLayer : public ILayer
1689 {
1690 public:
1696  virtual void setMode(ScaleMode mode) = 0;
1697 
1703  virtual ScaleMode getMode() const = 0;
1704 
1710  virtual void setShift(Weights shift) = 0;
1711 
1717  virtual Weights getShift() const = 0;
1718 
1724  virtual void setScale(Weights scale) = 0;
1725 
1731  virtual Weights getScale() const = 0;
1732 
1738  virtual void setPower(Weights power) = 0;
1739 
1745  virtual Weights getPower() const = 0;
1746 
1747 protected:
1748  virtual ~IScaleLayer() {}
1749 };
1750 
1762 class ISoftMaxLayer : public ILayer
1763 {
1764 protected:
1765  virtual ~ISoftMaxLayer() {}
1766 public:
1782  virtual void setAxes(uint32_t axes) = 0;
1783 
1789  virtual uint32_t getAxes() const = 0;
1790 };
1791 
1804 {
1805 protected:
1806  virtual ~IConcatenationLayer() {}
1807 
1808 public:
1817  virtual void setAxis(int axis) = 0;
1818 
1824  virtual int getAxis() const = 0;
1825 };
1826 
1837 {
1838 public:
1846  virtual void setKernelSize(DimsHW kernelSize) = 0;
1847 
1853  virtual DimsHW getKernelSize() const = 0;
1854 
1862  virtual void setNbOutputMaps(int nbOutputMaps) = 0;
1863 
1869  virtual int getNbOutputMaps() const = 0;
1870 
1878  virtual void setStride(DimsHW stride) = 0;
1879 
1885  virtual DimsHW getStride() const = 0;
1886 
1899  virtual void setPadding(DimsHW padding) = 0;
1900 
1906  virtual DimsHW getPadding() const = 0; // padding defaults to 0
1907 
1920  virtual void setNbGroups(int nbGroups) = 0;
1921 
1927  virtual int getNbGroups() const = 0;
1928 
1937  virtual void setKernelWeights(Weights weights) = 0;
1938 
1944  virtual Weights getKernelWeights() const = 0;
1945 
1955  virtual void setBiasWeights(Weights weights) = 0;
1956 
1962  virtual Weights getBiasWeights() const = 0;
1963 
1964 protected:
1965  virtual ~IDeconvolutionLayer() {}
1966 
1967 public:
1979  virtual void setPrePadding(Dims padding) = 0;
1980 
1986  virtual Dims getPrePadding() const = 0;
1987 
1999  virtual void setPostPadding(Dims padding) = 0;
2000 
2006  virtual Dims getPostPadding() const = 0;
2007 
2016  virtual void setPaddingMode(PaddingMode paddingMode) = 0;
2017 
2024  virtual PaddingMode getPaddingMode() const = 0;
2025 };
2026 
2034 enum class ElementWiseOperation : int
2035 {
2036  kSUM = 0,
2037  kPROD = 1,
2038  kMAX = 2,
2039  kMIN = 3,
2040  kSUB = 4,
2041  kDIV = 5,
2042  kPOW = 6
2043 };
2044 
2045 template <>
2047 {
2048  return 7;
2049 }
2050 
2063 {
2064 public:
2074  virtual void setOperation(ElementWiseOperation type) = 0;
2075 
2083  virtual ElementWiseOperation getOperation() const = 0;
2084 
2085 protected:
2086  virtual ~IElementWiseLayer() {}
2087 };
2088 
2092 class IGatherLayer : public ILayer
2093 {
2094 public:
2101  virtual void setGatherAxis(int axis) = 0;
2102 
2108  virtual int getGatherAxis() const = 0;
2109 
2110 protected:
2111  virtual ~IGatherLayer() {}
2112 };
2113 
2193 enum class RNNOperation : int
2194 {
2195  kRELU = 0,
2196  kTANH = 1,
2197  kLSTM = 2,
2198  kGRU = 3
2199 };
2200 
2201 template <>
2203 {
2204  return 4;
2205 }
2206 
2214 enum class RNNDirection : int
2215 {
2216  kUNIDIRECTION = 0,
2217  kBIDIRECTION = 1
2218 };
2219 
2220 template <>
2222 {
2223  return 2;
2224 }
2225 
2241 enum class RNNInputMode : int
2242 {
2243  kLINEAR = 0,
2244  kSKIP = 1
2245 };
2246 
2247 template <>
2249 {
2250  return 2;
2251 }
2252 
2264 class IRNNLayer : public ILayer
2265 {
2266 public:
2272  virtual unsigned getLayerCount() const = 0;
2273 
2282  virtual std::size_t getHiddenSize() const = 0;
2283 
2292  virtual int getSeqLength() const = 0;
2293 
2299  virtual void setOperation(RNNOperation op) = 0;
2300 
2306  virtual RNNOperation getOperation() const = 0;
2307 
2313  virtual void setInputMode(RNNInputMode op) = 0;
2314 
2320  virtual RNNInputMode getInputMode() const = 0;
2321 
2333  virtual void setDirection(RNNDirection op) = 0;
2334 
2340  virtual RNNDirection getDirection() const = 0;
2341 
2456  virtual void setWeights(Weights weights) = 0;
2457 
2463  virtual Weights getWeights() const = 0;
2464 
2516  virtual void setBias(Weights bias) = 0;
2517 
2523  virtual Weights getBias() const = 0;
2524 
2531  virtual int getDataLength() const = 0;
2532 
2549  virtual void setHiddenState(ITensor& hidden) = 0;
2550 
2556  virtual ITensor* getHiddenState() const = 0;
2557 
2576  virtual void setCellState(ITensor& cell) = 0;
2577 
2583  virtual ITensor* getCellState() const = 0;
2584 
2585 protected:
2586  virtual ~IRNNLayer() {}
2587 };
2588 
2596 enum class RNNGateType : int
2597 {
2598  kINPUT = 0,
2599  kOUTPUT = 1,
2600  kFORGET = 2,
2601  kUPDATE = 3,
2602  kRESET = 4,
2603  kCELL = 5,
2604  kHIDDEN = 6
2605 };
2606 
2607 template <>
2609 {
2610  return 7;
2611 }
2612 
2622 class IRNNv2Layer : public ILayer
2623 {
2624 public:
2625  virtual int32_t getLayerCount() const = 0; //< Get the layer count of the RNN
2626  virtual int32_t getHiddenSize() const = 0; //< Get the hidden size of the RNN
2627  virtual int32_t getMaxSeqLength() const = 0; //< Get the maximum sequence length of the RNN
2628  virtual int32_t getDataLength() const = 0; //< Get the maximum data length of the RNN
2629 
2644  virtual void setSequenceLengths(ITensor& seqLengths) = 0;
2645 
2653  virtual ITensor* getSequenceLengths() const = 0;
2654 
2659  virtual void setOperation(RNNOperation op) = 0;
2660 
2665  virtual RNNOperation getOperation() const = 0;
2666 
2671  virtual void setInputMode(RNNInputMode op) = 0;
2672 
2677  virtual RNNInputMode getInputMode() const = 0;
2678 
2683  virtual void setDirection(RNNDirection op) = 0;
2684 
2689  virtual RNNDirection getDirection() const = 0;
2690 
2708  virtual void setWeightsForGate(int layerIndex, RNNGateType gate, bool isW, Weights weights) = 0;
2709 
2714  virtual Weights getWeightsForGate(int layerIndex, RNNGateType gate, bool isW) const = 0;
2715 
2731  virtual void setBiasForGate(int layerIndex, RNNGateType gate, bool isW, Weights bias) = 0;
2732 
2737  virtual Weights getBiasForGate(int layerIndex, RNNGateType gate, bool isW) const = 0;
2738 
2751  virtual void setHiddenState(ITensor& hidden) = 0;
2752 
2757  virtual ITensor* getHiddenState() const = 0;
2758 
2773  virtual void setCellState(ITensor& cell) = 0;
2774 
2779  virtual ITensor* getCellState() const = 0;
2780 
2781 protected:
2782  virtual ~IRNNv2Layer() {}
2783 };
2784 
2791 {
2792 public:
2807  virtual DimsHW compute(DimsHW inputDims, DimsHW kernelSize, DimsHW stride, DimsHW padding, DimsHW dilation, const char* layerName) const = 0;
2808 
2809  virtual ~IOutputDimensionsFormula() {}
2810 };
2811 
2822 enum class PluginFormat : uint8_t
2823 {
2825  kNCHW = 0,
2826 
2830  kNC2HW2 = 1,
2831 
2833  kNHWC8 = 2
2834 };
2835 
2836 template <>
2838 {
2839  return 3;
2840 }
2841 
2849 class IPlugin
2850 {
2851 public:
2859  virtual int getNbOutputs() const = 0;
2860 
2870  virtual Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) = 0;
2871 
2888  virtual void configure(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, int maxBatchSize) = 0;
2889 
2895  virtual int initialize() = 0;
2896 
2901  virtual void terminate() = 0;
2902 
2911  virtual size_t getWorkspaceSize(int maxBatchSize) const = 0;
2912 
2924  virtual int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) = 0;
2925 
2931  virtual size_t getSerializationSize() = 0;
2932 
2940  virtual void serialize(void* buffer) = 0;
2941 
2942  virtual ~IPlugin() {}
2943 };
2944 
2953 class IPluginExt : public IPlugin
2954 {
2955 public:
2961  virtual int getTensorRTVersion() const
2962  {
2963  return NV_TENSORRT_VERSION;
2964  }
2965 
2976  virtual bool supportsFormat(DataType type, PluginFormat format) const = 0;
2977 
2994  virtual void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) = 0;
2995 
2996  virtual ~IPluginExt() {}
2997 
2998 protected:
3002  void configure(const Dims* /*inputDims*/, int /*nbInputs*/, const Dims* /*outputDims*/, int /*nbOutputs*/, int /*maxBatchSize*/) _TENSORRT_FINAL {}
3003 };
3004 
3017 {
3018 public:
3024  virtual int getTensorRTVersion() const
3025  {
3026  return NV_TENSORRT_VERSION;
3027  }
3028 
3031  // \see IPluginCreator::getPluginName()
3033  virtual const char* getPluginType() const = 0;
3034 
3037  // \see IPluginCreator::getPluginVersion()
3039  virtual const char* getPluginVersion() const = 0;
3040 
3048  virtual int getNbOutputs() const = 0;
3049 
3059  virtual Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) = 0;
3060 
3071  virtual bool supportsFormat(DataType type, PluginFormat format) const = 0;
3072 
3089  virtual void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) = 0;
3090 
3096  virtual int initialize() = 0;
3097 
3102  virtual void terminate() = 0;
3103 
3112  virtual size_t getWorkspaceSize(int maxBatchSize) const = 0;
3113 
3125  virtual int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) = 0;
3126 
3132  virtual size_t getSerializationSize() const = 0;
3133 
3141  virtual void serialize(void* buffer) const = 0;
3142 
3146  virtual void destroy() = 0;
3147 
3151  virtual IPluginV2* clone() const = 0;
3152 
3157  virtual void setPluginNamespace(const char* pluginNamespace) = 0;
3158 
3162  virtual const char* getPluginNamespace() const = 0;
3163 
3164 protected:
3165  virtual ~IPluginV2() {}
3166 };
3167 
3168 class IGpuAllocator;
3169 
3180 class IPluginV2Ext : public IPluginV2
3181 {
3182 public:
3189  virtual nvinfer1::DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const = 0;
3190 
3201  virtual bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const = 0;
3202 
3216  virtual bool canBroadcastInputAcrossBatch(int inputIndex) const = 0;
3217 
3240 
3241  virtual void configurePlugin(const Dims* inputDims, int nbInputs, const Dims* outputDims,
3242  int nbOutputs, const DataType* inputTypes, const DataType* outputTypes,
3243  const bool* inputIsBroadcast, const bool* outputIsBroadcast, PluginFormat floatFormat, int maxBatchSize)
3244  = 0;
3245 
3246  virtual ~IPluginV2Ext() {}
3247 
3259  virtual void attachToContext(cudnnContext* /*cudnn*/, cublasContext* /*cublas*/, IGpuAllocator* /*allocator*/) {}
3260 
3267  virtual void detachFromContext() {}
3268 
3274  virtual IPluginV2Ext* clone() const _TENSORRT_OVERRIDE = 0;
3275 
3276 protected:
3284  {
3285  return (0x01000000 | (NV_TENSORRT_VERSION & 0xFFFFFF));
3286  }
3287 
3291  void configureWithFormat(const Dims* /*inputDims*/, int /*nbInputs*/, const Dims* /*outputDims*/,
3292  int /*nbOutputs*/, DataType /*type*/, PluginFormat /*format*/, int /*maxBatchSize*/) _TENSORRT_OVERRIDE _TENSORRT_FINAL {}
3293 };
3294 
3304 class IPluginLayer : public ILayer
3305 {
3306 public:
3312  virtual IPlugin& getPlugin() = 0;
3313 
3314 protected:
3315  virtual ~IPluginLayer() {}
3316 };
3317 
3327 class IPluginV2Layer : public ILayer
3328 {
3329 public:
3335  virtual IPluginV2& getPlugin() = 0;
3336 
3337 protected:
3338  virtual ~IPluginV2Layer() {}
3339 };
3340 
3345 
3346 enum class PluginFieldType : int
3347 {
3348  kFLOAT16 = 0,
3349  kFLOAT32 = 1,
3350  kFLOAT64 = 2,
3351  kINT8 = 3,
3352  kINT16 = 4,
3353  kINT32 = 5,
3354  kCHAR = 6,
3355  kDIMS = 7,
3356  kUNKNOWN = 8
3357 };
3358 
3367 {
3371  const char* name;
3375  const void* data;
3384  int length;
3385 
3386  PluginField(const char* name_ = nullptr, const void* data_ = nullptr, const PluginFieldType type_ = PluginFieldType::kUNKNOWN, int length_ = 0)
3387  : name(name_)
3388  , data(data_)
3389  , type(type_)
3390  , length(length_)
3391  {
3392  }
3393 };
3394 
3396 {
3397  int nbFields;
3399 };
3400 
3408 
3410 {
3411 public:
3415  virtual int getTensorRTVersion() const { return NV_TENSORRT_VERSION; }
3416 
3420  virtual const char* getPluginName() const = 0;
3421 
3425  virtual const char* getPluginVersion() const = 0;
3426 
3431  virtual const PluginFieldCollection* getFieldNames() = 0;
3432 
3436  virtual IPluginV2* createPlugin(const char* name, const PluginFieldCollection* fc) = 0;
3437 
3441  virtual IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) = 0;
3442 
3449  virtual void setPluginNamespace(const char* pluginNamespace) = 0;
3450 
3454  virtual const char* getPluginNamespace() const = 0;
3455 
3456  virtual ~IPluginCreator() {}
3457 };
3458 
3473 
3475 {
3476 public:
3481  virtual bool registerCreator(IPluginCreator& creator, const char* pluginNamespace) = 0;
3482 
3487  virtual IPluginCreator* const* getPluginCreatorList(int* numCreators) const = 0;
3488 
3493  virtual IPluginCreator* getPluginCreator(const char* pluginType, const char* pluginVersion, const char* pluginNamespace = "") = 0;
3494 
3495 protected:
3496  virtual ~IPluginRegistry() {}
3497 };
3498 
3506 enum class UnaryOperation : int
3507 {
3508  kEXP = 0,
3509  kLOG = 1,
3510  kSQRT = 2,
3511  kRECIP = 3,
3512  kABS = 4,
3513  kNEG = 5,
3514  kSIN = 6,
3515  kCOS = 7,
3516  kTAN = 8,
3517  kSINH = 9,
3518  kCOSH = 10,
3519  kASIN = 11,
3520  kACOS = 12,
3521  kATAN = 13,
3522  kASINH = 14,
3523  kACOSH = 15,
3524  kATANH = 16,
3525  kCEIL = 17,
3526  kFLOOR = 18
3527 };
3528 
3529 template <>
3531 {
3532  return 19;
3533 }
3534 
3542 class IUnaryLayer : public ILayer
3543 {
3544 public:
3550  virtual void setOperation(UnaryOperation op) = 0;
3551 
3557  virtual UnaryOperation getOperation() const = 0;
3558 
3559 protected:
3560  virtual ~IUnaryLayer() {}
3561 };
3562 
3568 enum class ReduceOperation : int
3569 {
3570  kSUM = 0,
3571  kPROD = 1,
3572  kMAX = 2,
3573  kMIN = 3,
3574  kAVG = 4
3575 };
3576 
3577 template <>
3579 {
3580  return 5;
3581 }
3582 
3590 class IReduceLayer : public ILayer
3591 {
3592 public:
3598  virtual void setOperation(ReduceOperation op) = 0;
3599 
3605  virtual ReduceOperation getOperation() const = 0;
3606 
3612  virtual void setReduceAxes(uint32_t reduceAxes) = 0;
3613 
3619  virtual uint32_t getReduceAxes() const = 0;
3620 
3626  virtual void setKeepDimensions(bool keepDimensions) = 0;
3627 
3633  virtual bool getKeepDimensions() const = 0;
3634 
3635 protected:
3636  virtual ~IReduceLayer() {}
3637 };
3638 
3649 class IPaddingLayer : public ILayer
3650 {
3651 public:
3659  virtual void setPrePadding(DimsHW padding) = 0;
3660 
3666  virtual DimsHW getPrePadding() const = 0;
3667 
3675  virtual void setPostPadding(DimsHW padding) = 0;
3676 
3682  virtual DimsHW getPostPadding() const = 0;
3683 
3684 protected:
3685  virtual ~IPaddingLayer() {}
3686 };
3687 
3689 {
3697 };
3698 
3708 class IShuffleLayer : public ILayer
3709 {
3710 public:
3720  virtual void setFirstTranspose(Permutation permutation) = 0;
3721 
3729  virtual Permutation getFirstTranspose() const = 0;
3730 
3749  virtual void setReshapeDimensions(Dims dimensions) = 0;
3750 
3756  virtual Dims getReshapeDimensions() const = 0;
3757 
3770  virtual void setSecondTranspose(Permutation permutation) = 0;
3771 
3779  virtual Permutation getSecondTranspose() const = 0;
3780 
3781 protected:
3782  virtual ~IShuffleLayer() {}
3783 };
3784 
3788 class ISliceLayer : public ILayer
3789 {
3790 public:
3798  virtual void setStart(Dims start) = 0;
3799 
3806  virtual Dims getStart() const = 0;
3807 
3814  virtual void setSize(Dims size) = 0;
3815 
3822  virtual Dims getSize() const = 0;
3823 
3830  virtual void setStride(Dims stride) = 0;
3831 
3838  virtual Dims getStride() const = 0;
3839 
3840 protected:
3841  virtual ~ISliceLayer() {}
3842 };
3843 
3849 enum class TopKOperation : int
3850 {
3851  kMAX = 0,
3852  kMIN = 1,
3853 };
3854 
3855 template <>
3857 {
3858  return 2;
3859 }
3860 
3868 class ITopKLayer : public ILayer
3869 {
3870 public:
3876  virtual void setOperation(TopKOperation op) = 0;
3877 
3883  virtual TopKOperation getOperation() const = 0;
3884 
3892  virtual void setK(int k) = 0;
3893 
3899  virtual int getK() const = 0;
3900 
3906  virtual void setReduceAxes(uint32_t reduceAxes) = 0;
3907 
3913  virtual uint32_t getReduceAxes() const = 0;
3914 
3915 protected:
3916  virtual ~ITopKLayer() {}
3917 };
3918 
3925 enum class MatrixOperation : int
3926 {
3930  kNONE,
3931 
3933  kTRANSPOSE,
3934 
3937  kVECTOR
3938 };
3939 
3940 template <>
3942 {
3943  return 3;
3944 }
3945 
3972 {
3973 public:
3980  virtual void setOperation(int index, MatrixOperation op) = 0;
3981 
3987  virtual MatrixOperation getOperation(int index) const = 0;
3988 
3997  virtual void setTranspose(int index, bool val) = 0;
3998 
4006  virtual bool getTranspose(int index) const = 0;
4007 
4008 protected:
4009  virtual ~IMatrixMultiplyLayer() {}
4010 };
4011 
4027 {
4028 protected:
4029  virtual ~IRaggedSoftMaxLayer() {}
4030 };
4031 
4040 class IIdentityLayer : public ILayer
4041 {
4042 protected:
4043  virtual ~IIdentityLayer() {}
4044 };
4045 
4052 class IConstantLayer : public ILayer
4053 {
4054 public:
4064  virtual void setWeights(Weights weights) = 0;
4065 
4071  virtual Weights getWeights() const = 0;
4072 
4080  virtual void setDimensions(Dims dimensions) = 0;
4081 
4089  virtual Dims getDimensions() const = 0;
4090 
4091 protected:
4092  virtual ~IConstantLayer() {}
4093 };
4094 
4103 {
4104 public:
4121  virtual ITensor* addInput(const char* name, DataType type, Dims dimensions) = 0;
4122 
4128  virtual void markOutput(ITensor& tensor) = 0;
4129 
4143  virtual IConvolutionLayer* addConvolution(ITensor& input, int nbOutputMaps, DimsHW kernelSize, Weights kernelWeights, Weights biasWeights) = 0;
4144 
4157  virtual IFullyConnectedLayer* addFullyConnected(ITensor& input, int nbOutputs, Weights kernelWeights, Weights biasWeights) = 0;
4158 
4172  virtual IActivationLayer* addActivation(ITensor& input, ActivationType type) = 0;
4173 
4185  virtual IPoolingLayer* addPooling(ITensor& input, PoolingType type, DimsHW windowSize) = 0;
4186 
4200  virtual ILRNLayer* addLRN(ITensor& input, int window, float alpha, float beta, float k) = 0;
4201 
4220  virtual IScaleLayer* addScale(ITensor& input, ScaleMode mode, Weights shift, Weights scale, Weights power) = 0;
4221 
4229  virtual ISoftMaxLayer* addSoftMax(ITensor& input) = 0;
4230 
4243  virtual IConcatenationLayer* addConcatenation(ITensor* const* inputs, int nbInputs) = 0;
4244 
4258  virtual IDeconvolutionLayer* addDeconvolution(ITensor& input, int nbOutputMaps, DimsHW kernelSize, Weights kernelWeights, Weights biasWeights) = 0;
4259 
4279  virtual IElementWiseLayer* addElementWise(ITensor& input1, ITensor& input2, ElementWiseOperation op) = 0;
4280 
4336  virtual IRNNLayer* addRNN(ITensor& inputs, int layerCount, std::size_t hiddenSize, int maxSeqLen, RNNOperation op, RNNInputMode mode, RNNDirection dir, Weights weights, Weights bias) = 0;
4337 
4349  virtual IPluginLayer* addPlugin(ITensor* const* inputs, int nbInputs, IPlugin& plugin) = 0;
4350 
4361  virtual IUnaryLayer* addUnary(ITensor& input, UnaryOperation operation) = 0;
4362 
4373  virtual IPaddingLayer* addPadding(ITensor& input, DimsHW prePadding, DimsHW postPadding) = 0;
4374 
4384  virtual IShuffleLayer* addShuffle(ITensor& input) = 0;
4385 
4396 
4405 
4418 
4429 
4442 
4453 
4461  virtual int getNbLayers() const = 0;
4462 
4472  virtual ILayer* getLayer(int index) const = 0;
4473 
4481  virtual int getNbInputs() const = 0;
4482 
4492  virtual ITensor* getInput(int index) const = 0; // adding inputs invalidates indexing here
4493 
4501  virtual int getNbOutputs() const = 0;
4502 
4512  virtual ITensor* getOutput(int index) const = 0; // adding outputs invalidates indexing here
4513 
4517  virtual void destroy() = 0;
4518 
4519 protected:
4520  virtual ~INetworkDefinition() {}
4521 
4522 public:
4542  virtual IReduceLayer* addReduce(ITensor& input, ReduceOperation operation, uint32_t reduceAxes, bool keepDimensions) = 0;
4543 
4572  virtual ITopKLayer* addTopK(ITensor& input, TopKOperation op, int k, uint32_t reduceAxes) = 0;
4573 
4585  virtual IGatherLayer* addGather(ITensor& data, ITensor& indices, int axis) = 0;
4586 
4597  virtual IRaggedSoftMaxLayer* addRaggedSoftMax(ITensor& input, ITensor& bounds) = 0;
4598 
4611  virtual IMatrixMultiplyLayer* addMatrixMultiply(ITensor& input0, MatrixOperation op0, ITensor& input1, MatrixOperation op1) = 0;
4612 
4627  virtual IMatrixMultiplyLayer* addMatrixMultiply(ITensor& input0, bool transpose0, ITensor& input1, bool transpose1) = 0;
4628 
4643  virtual IConstantLayer* addConstant(Dims dimensions, Weights weights) = 0;
4644 
4703  virtual IRNNv2Layer* addRNNv2(ITensor& input, int32_t layerCount, int32_t hiddenSize, int32_t maxSeqLen, RNNOperation op) = 0;
4704 
4716  virtual IPluginLayer* addPluginExt(ITensor* const* inputs, int nbInputs, IPluginExt& plugin) = 0;
4717 
4727  virtual IIdentityLayer* addIdentity(ITensor& input) = 0;
4728 
4739  virtual void removeTensor(ITensor& tensor) = 0;
4740 
4748  virtual void unmarkOutput(ITensor& tensor) = 0;
4749 
4761  virtual IPluginV2Layer* addPluginV2(ITensor* const* inputs, int nbInputs, IPluginV2& plugin) = 0;
4762 
4777  virtual ISliceLayer* addSlice(ITensor& input, Dims start, Dims size, Dims stride) = 0;
4778 };
4779 
4791 {
4792 public:
4799  virtual void reportLayerTime(const char* layerName, float ms) = 0;
4800 
4801  virtual ~IProfiler() {}
4802 };
4803 
4804 class ICudaEngine;
4805 
4817 {
4818 public:
4830  virtual bool execute(int batchSize, void** bindings) = 0;
4831 
4845  virtual bool enqueue(int batchSize, void** bindings, cudaStream_t stream, cudaEvent_t* inputConsumed) = 0;
4846 
4854  virtual void setDebugSync(bool sync) = 0;
4855 
4861  virtual bool getDebugSync() const = 0;
4862 
4868  virtual void setProfiler(IProfiler*) = 0;
4869 
4875  virtual IProfiler* getProfiler() const = 0;
4876 
4882  virtual const ICudaEngine& getEngine() const = 0;
4883 
4887  virtual void destroy() = 0;
4888 
4889 protected:
4890  virtual ~IExecutionContext() {}
4891 
4892 public:
4900  virtual void setName(const char* name) = 0;
4901 
4907  virtual const char* getName() const = 0;
4908 
4920  virtual void setDeviceMemory(void* memory) = 0;
4921 };
4922 
4931 {
4932 public:
4938  virtual int getNbBindings() const = 0;
4939 
4953  virtual int getBindingIndex(const char* name) const = 0;
4954 
4965  virtual const char* getBindingName(int bindingIndex) const = 0;
4966 
4975  virtual bool bindingIsInput(int bindingIndex) const = 0;
4976 
4985  virtual Dims getBindingDimensions(int bindingIndex) const = 0;
4986 
4995  virtual DataType getBindingDataType(int bindingIndex) const = 0;
4996 
5002  virtual int getMaxBatchSize() const = 0;
5003 
5012  virtual int getNbLayers() const = 0;
5013 
5020  virtual std::size_t getWorkspaceSize() const = 0;
5021 
5031  virtual IHostMemory* serialize() const = 0;
5032 
5039 
5043  virtual void destroy() = 0;
5044 
5055  virtual TensorLocation getLocation(int bindingIndex) const = 0;
5056 
5057 protected:
5058  virtual ~ICudaEngine() {}
5059 
5060 public:
5069 
5075  virtual size_t getDeviceMemorySize() const = 0;
5076 
5082  virtual bool isRefittable() const = 0;
5083 };
5084 
5090 enum class CalibrationAlgoType : int
5091 {
5092  kLEGACY_CALIBRATION = 0,
5093  kENTROPY_CALIBRATION = 1,
5094  kENTROPY_CALIBRATION_2 = 2
5095 };
5096 
5097 template <>
5099 {
5100  return 3;
5101 }
5102 
5115 {
5116 public:
5122  virtual int getBatchSize() const = 0;
5123 
5137  virtual bool getBatch(void* bindings[], const char* names[], int nbBindings) = 0; // get a pointer to the input batch
5138 
5151  virtual const void* readCalibrationCache(std::size_t& length) = 0;
5152 
5161  virtual void writeCalibrationCache(const void* ptr, std::size_t length) = 0;
5162 
5168  virtual CalibrationAlgoType getAlgorithm() = 0;
5169 
5170  virtual ~IInt8Calibrator() {}
5171 };
5172 
5177 {
5178 public:
5182  virtual CalibrationAlgoType getAlgorithm() { return CalibrationAlgoType::kENTROPY_CALIBRATION; }
5183 
5184  virtual ~IInt8EntropyCalibrator() {}
5185 };
5186 
5191 {
5192 public:
5196  CalibrationAlgoType getAlgorithm() override { return CalibrationAlgoType::kENTROPY_CALIBRATION_2; }
5197 
5198  virtual ~IInt8EntropyCalibrator2() {}
5199 };
5200 
5206 {
5207 public:
5211  virtual CalibrationAlgoType getAlgorithm() { return CalibrationAlgoType::kLEGACY_CALIBRATION; }
5212 
5218  virtual double getQuantile() const = 0;
5219 
5225  virtual double getRegressionCutoff() const = 0;
5226 
5238  virtual const void* readHistogramCache(std::size_t& length) = 0;
5239 
5248  virtual void writeHistogramCache(const void* ptr, std::size_t length) = 0;
5249 
5250  virtual ~IInt8LegacyCalibrator() {}
5251 };
5252 
5262 {
5263  kDEFAULT = 0,
5264  kSAFE_GPU = 1,
5265  kSAFE_DLA = 2,
5266 };
5267 
5268 template <>
5270 {
5271  return 3;
5272 }
5273 
5280 {
5281 public:
5296  virtual void* allocate(uint64_t size, uint64_t alignment, uint32_t flags) = 0;
5297 
5305  virtual void free(void* memory) = 0;
5306 
5311  virtual ~IGpuAllocator() {}
5312 };
5313 
5322 {
5323 public:
5330 
5338  virtual void setMaxBatchSize(int batchSize) = 0;
5339 
5348  virtual int getMaxBatchSize() const = 0;
5349 
5357  virtual void setMaxWorkspaceSize(std::size_t workspaceSize) = 0;
5358 
5366  virtual std::size_t getMaxWorkspaceSize() const = 0;
5367 
5379  virtual void setHalf2Mode(bool mode) = 0;
5380 
5388  virtual bool getHalf2Mode() const = 0;
5389 
5395  virtual void setDebugSync(bool sync) = 0;
5396 
5402  virtual bool getDebugSync() const = 0;
5403 
5412  virtual void setMinFindIterations(int minFind) = 0;
5413 
5419  virtual int getMinFindIterations() const = 0;
5420 
5429  virtual void setAverageFindIterations(int avgFind) = 0;
5430 
5436  virtual int getAverageFindIterations() const = 0;
5437 
5444 
5448  virtual bool platformHasFastFp16() const = 0;
5449 
5453  virtual bool platformHasFastInt8() const = 0;
5454 
5458  virtual void destroy() = 0;
5459 
5465  virtual void setInt8Mode(bool mode) = 0;
5466 
5472  virtual bool getInt8Mode() const = 0;
5473 
5477  virtual void setInt8Calibrator(IInt8Calibrator* calibrator) = 0;
5478 
5489  virtual void setDeviceType(ILayer* layer, DeviceType deviceType) = 0;
5490 
5495  virtual DeviceType getDeviceType(const ILayer* layer) const = 0;
5496 
5502  virtual bool isDeviceTypeSet(const ILayer* layer) const = 0;
5503 
5509  virtual void resetDeviceType(ILayer* layer) = 0;
5510 
5515  virtual bool canRunOnDLA(const ILayer* layer) const = 0;
5516 
5521  virtual void setDefaultDeviceType(DeviceType deviceType) = 0;
5522 
5526  virtual DeviceType getDefaultDeviceType() const = 0;
5527 
5532  virtual int getMaxDLABatchSize() const = 0;
5533 
5541  virtual void allowGPUFallback(bool setFallBackMode) = 0;
5542 
5546  virtual int getNbDLACores() const = 0;
5547 
5554  virtual void setDLACore(int dlaCore) = 0;
5555 
5560  virtual int getDLACore() const = 0;
5561 
5565  virtual void reset(nvinfer1::INetworkDefinition& network) = 0;
5566 
5567 protected:
5568  virtual ~IBuilder() {}
5569 
5570 public:
5580  virtual void setGpuAllocator(IGpuAllocator* allocator) = 0;
5581 
5591  virtual void setFp16Mode(bool mode) = 0;
5592 
5598  virtual bool getFp16Mode() const = 0;
5599 
5614  virtual void setStrictTypeConstraints(bool mode) = 0;
5615 
5621  virtual bool getStrictTypeConstraints() const = 0;
5622 
5626  virtual void setRefittable(bool canRefit) = 0;
5627 
5633  virtual bool getRefittable() const = 0;
5634 
5638  virtual void setEngineCapability(EngineCapability capability) = 0;
5639 
5645  virtual EngineCapability getEngineCapability() const = 0;
5646 };
5647 
5654 enum class WeightsRole : int
5655 {
5656  kKERNEL = 0,
5657  kBIAS = 1,
5658  kSHIFT = 2,
5659  kSCALE = 3,
5660  kCONSTANT = 4,
5661 };
5662 
5663 template <>
5665 {
5666  return 5;
5667 }
5668 
5677 {
5678 public:
5689  virtual bool setWeights(const char* layerName,
5690  WeightsRole role, Weights weights)
5691  = 0;
5692 
5698  virtual bool refitCudaEngine() = 0;
5699 
5716  virtual int getMissing(int size, const char** layerNames, WeightsRole* roles) = 0;
5717 
5730  virtual int getAll(int size, const char** layerNames, WeightsRole* roles) = 0;
5731 
5732  virtual void destroy() = 0;
5733 
5734 protected:
5735  virtual ~IRefitter() {}
5736 };
5737 
5745 {
5746 public:
5761  virtual IPlugin* createPlugin(const char* layerName, const void* serialData, size_t serialLength) = 0;
5762 };
5763 
5772 {
5773 public:
5783  virtual nvinfer1::ICudaEngine* deserializeCudaEngine(const void* blob, std::size_t size, IPluginFactory* pluginFactory) = 0;
5784 
5790  virtual void setDLACore(int dlaCore) = 0;
5791 
5796  virtual int getDLACore() const = 0;
5797 
5801  virtual int getNbDLACores() const = 0;
5802 
5806  virtual void destroy() = 0;
5807 
5808 protected:
5809  virtual ~IRuntime() {}
5810 
5811 public:
5820  virtual void setGpuAllocator(IGpuAllocator* allocator) = 0;
5821 };
5822 
5831 class ILogger
5832 {
5833 public:
5839  enum class Severity
5840  {
5841  kINTERNAL_ERROR = 0,
5842  kERROR = 1,
5843  kWARNING = 2,
5844  kINFO = 3,
5845  kVERBOSE = 4,
5846  };
5847 
5854  virtual void log(Severity severity, const char* msg) = 0;
5855 
5856  virtual ~ILogger() {}
5857 };
5858 
5859 template <>
5860 inline int EnumMax<ILogger::Severity>()
5861 {
5862  return 5;
5863 }
5864 
5865 } // namespace nvinfer1
5866 
5867 extern "C" TENSORRTAPI void* createInferBuilder_INTERNAL(void* logger, int version);
5868 extern "C" TENSORRTAPI void* createInferRefitter_INTERNAL(void* engine, void* logger, int version);
5869 extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(void* logger, int version);
5870 
5874 extern "C" TENSORRTAPI nvinfer1::ILogger* getLogger();
5875 
5881 extern "C" TENSORRTAPI int getInferLibVersion();
5882 
5886 extern "C" TENSORRTAPI nvinfer1::IPluginRegistry* getPluginRegistry();
5887 
5888 namespace nvinfer1
5889 {
5895 namespace // unnamed namespace in case the compiler doesn't inline these
5896 {
5897 inline IBuilder* createInferBuilder(ILogger& logger)
5898 {
5899  return static_cast<IBuilder*>(createInferBuilder_INTERNAL(&logger, NV_TENSORRT_VERSION));
5900 }
5901 
5907 inline IRefitter* createInferRefitter(ICudaEngine& engine, ILogger& logger)
5908 {
5909  return static_cast<IRefitter*>(createInferRefitter_INTERNAL(&engine, &logger, NV_TENSORRT_VERSION));
5910 }
5911 
5917 inline IRuntime* createInferRuntime(ILogger& logger)
5918 {
5919  return static_cast<IRuntime*>(createInferRuntime_INTERNAL(&logger, NV_TENSORRT_VERSION));
5920 }
5921 }
5922 
5929 template <typename T>
5931 {
5932 public:
5933  PluginRegistrar() { getPluginRegistry()->registerCreator(instance, ""); }
5934 private:
5935  T instance{};
5936 };
5937 
5938 #define REGISTER_TENSORRT_PLUGIN(name) \
5939  static nvinfer1::PluginRegistrar<name> pluginRegistrar##name {}
5940 }
5941 
5942 #endif
DimensionType type[MAX_DIMS]
The type of each dimension.
Definition: NvInfer.h:187
int EnumMax< PluginFormat >()
Maximum number of elements in PluginFormat enum.
Definition: NvInfer.h:2837
virtual bool enqueue(int batchSize, void **bindings, cudaStream_t stream, cudaEvent_t *inputConsumed)=0
Asynchronously execute inference on a batch.
int w() const
Get the width.
Definition: NvInfer.h:505
int n() const
Get the index count.
Definition: NvInfer.h:463
Use SAME padding with prePadding <= postPadding.
An engine for executing inference on a built network.
Definition: NvInfer.h:4930
Substract the second element from the first.
Perform the normal matrix multiplication in the first recurrent layer.
virtual void configureWithFormat(const Dims *inputDims, int nbInputs, const Dims *outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize)=0
Configure the layer.
DataType
The type of weights and tensors.
Definition: NvInfer.h:125
virtual void configurePlugin(const Dims *inputDims, int nbInputs, const Dims *outputDims, int nbOutputs, const DataType *inputTypes, const DataType *outputTypes, const bool *inputIsBroadcast, const bool *outputIsBroadcast, PluginFormat floatFormat, int maxBatchSize)=0
Configure the layer with input and output data types.
PaddingMode
Enumerates the modes of padding to perform in convolution, deconvolution and pooling layer...
Definition: NvInfer.h:962
virtual void setAverageCountExcludesPadding(bool exclusive)=0
Set whether average pooling uses as a denominator the overlap area between the window and the unpadde...
virtual bool isNetworkInput() const =0
Whether the tensor is a network input.
virtual DimsHW getDilation() const =0
Get the dilation for a convolution.
Severity
Definition: NvInfer.h:5839
virtual void setMinFindIterations(int minFind)=0
Set the number of minimization iterations used when timing layers.
virtual int getMaxBatchSize() const =0
Get the maximum batch size.
int EnumMax< TensorLocation >()
Maximum number of elements in TensorLocation enum.
Definition: NvInfer.h:598
virtual ITensor * getCellState() const =0
Get the initial cell state of the RNN.
virtual size_t getSerializationSize() const =0
Find the size of the serialization buffer required.
virtual IPlugin * createPlugin(const char *layerName, const void *serialData, size_t serialLength)=0
Create a plugin from serialized data.
virtual ITensor * addInput(const char *name, DataType type, Dims dimensions)=0
Add an input tensor to the network.
virtual uint32_t getAxes() const =0
Get the axis along which softmax occurs.
virtual void resetPrecision()=0
reset the computational precision for this layer
virtual void setBias(Weights bias)=0
Set the bias parameters for the RNN.
DimsNCHW(int batchSize, int channels, int height, int width)
Construct a DimsNCHW given batch size, channel count, height and width.
Definition: NvInfer.h:443
virtual void setKernelWeights(Weights weights)=0
Set the kernel weights for the deconvolution.
virtual ReduceOperation getOperation() const =0
Get the reduce operation for the layer.
#define NV_TENSORRT_MAJOR
TensorRT major version.
Definition: NvInfer.h:56
virtual IExecutionContext * createExecutionContext()=0
Create an execution context.
virtual IPluginV2 * createPlugin(const char *name, const PluginFieldCollection *fc)=0
Return a plugin object. Return nullptr in case of error.
virtual int getBatchSize() const =0
Get the batch size used for calibration batches.
RNNOperation
Enumerates the RNN operations that may be performed by an RNN layer.
Definition: NvInfer.h:2193
virtual int getTensorRTVersion() const
Return the API version with which this plugin was built.
Definition: NvInfer.h:3024
virtual void setOperation(ElementWiseOperation type)=0
Set the binary operation for the layer.
virtual void setBroadcastAcrossBatch(bool broadcastAcrossBatch)=0
Set whether to enable broadcast of tensor across the batch.
A Softmax layer in a network definition.
Definition: NvInfer.h:1762
virtual Dims getPrePadding() const =0
Get the pre-padding.
virtual int getNbGroups() const =0
Set the number of groups for a convolution.
virtual Weights getPower() const =0
Get the power value.
virtual void setWeightsForGate(int layerIndex, RNNGateType gate, bool isW, Weights weights)=0
Set the weight parameters for an individual gate in the RNN.
MatrixOperation
Enumerates the operations that may be performed on a tensor by IMatrixMultiplyLayer before multiplica...
Definition: NvInfer.h:3925
Definition: NvInfer.h:3688
Plugin class for user-implemented layers.
Definition: NvInfer.h:3016
virtual double getQuantile() const =0
The quantile (between 0 and 1) that will be used to select the region maximum when the quantile metho...
virtual std::size_t getWorkspaceSize() const =0
Get the amount of workspace the engine uses.
An application error has occurred.
An application error has been discovered, but TensorRT has recovered or fallen back to a default...
virtual Weights getBias() const =0
Get the bias parameter vector for the RNN.
virtual void destroy()=0
Destroy the allocated memory.
virtual bool bindingIsInput(int bindingIndex) const =0
Determine whether a binding is an input binding.
virtual int getGatherAxis() const =0
Get the non-batch dimension axis to gather on.
virtual void setInputMode(RNNInputMode op)=0
Set the input mode of the RNN layer.
virtual const char * getName() const =0
Return the name of a layer.
virtual Weights getScale() const =0
Get the scale value.
virtual DimsHW getKernelSize() const =0
Get the HW kernel size of the deconvolution.
Layer that represents an unary operation.
Definition: NvInfer.h:3542
virtual void destroy()=0
Destroy this object.
virtual DimsHW getStride() const =0
Get the stride of the deconvolution.
PluginFieldType
Definition: NvInfer.h:3346
virtual void setDLACore(int dlaCore)=0
Set the DLA core that the deserialized engine must execute on.
Rectified linear activation.
virtual bool getDebugSync() const =0
Get the debug sync flag.
virtual bool execute(int batchSize, void **bindings)=0
Synchronously execute inference on a batch.
virtual void destroy()=0
Destroy this INetworkDefinition object.
virtual const PluginFieldCollection * getFieldNames()=0
Return a list of fields that needs to be passed to createPlugin.
virtual void setPrePadding(Dims padding)=0
Set the pre-padding.
An Activation layer in a network definition.
Definition: NvInfer.h:1317
TENSORRTAPI void * createInferRuntime_INTERNAL(void *logger, int version)
Internal C entry point for creating IRuntime.
int w() const
Get the width.
Definition: NvInfer.h:380
virtual Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims)=0
Get the dimension of an output tensor.
RNNDirection
Enumerates the RNN direction that may be performed by an RNN layer.
Definition: NvInfer.h:2214
int EnumMax< DataType >()
Maximum number of elements in DataType enum.
Definition: NvInfer.h:134
virtual PaddingMode getPaddingMode() const =0
Get the padding mode.
Layer that represents a Matrix Multiplication.
Definition: NvInfer.h:3971
virtual DimsHW getKernelSize() const =0
Get the HW kernel size of the convolution.
virtual DataType type() const =0
The type of the memory that was allocated.
virtual int getAverageFindIterations() const =0
Query the number of averaging iterations.
Safety restricted capability, TensorRT flow that can only run on DLA/PVA devices. ...
DimsCHW()
Construct an empty DimsCHW object.
Definition: NvInfer.h:319
No operation is performed on the first recurrent layer.
const void * values
The weight values, in a contiguous array.
Definition: NvInfer.h:520
int c() const
Get the channel count.
Definition: NvInfer.h:477
void configure(const Dims *, int, const Dims *, int, int) _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInfer.h:3002
virtual bool getDebugSync() const =0
Query whether the builder will use debug synchronization.
virtual void setAlpha(float alpha)=0
Set the LRN alpha value.
virtual int initialize()=0
Initialize the layer for execution. This is called when the engine is created.
struct CUevent_st * cudaEvent_t
Forward declaration of cudaEvent_t.
Definition: NvInfer.h:106
CalibrationAlgoType getAlgorithm() override
Definition: NvInfer.h:5196
virtual float getDynamicRangeMin() const =0
Get minimum of dynamic range.
virtual Dims getBindingDimensions(int bindingIndex) const =0
Get the dimensions of a binding.
int h() const
Get the height.
Definition: NvInfer.h:491
virtual void setPrecision(DataType dataType)=0
Set the computational precision of this layer.
A convolution layer in a network definition.
Definition: NvInfer.h:991
virtual ITopKLayer * addTopK(ITensor &input, TopKOperation op, int k, uint32_t reduceAxes)=0
Add a TopK layer to the network.
Allows a serialized engine to be deserialized.
Definition: NvInfer.h:5771
virtual void setStart(Dims start)=0
Set the start offset.
virtual Dims getReshapeDimensions() const =0
Get the reshaped dimensions.
virtual void writeHistogramCache(const void *ptr, std::size_t length)=0
Save a histogram cache.
virtual void setPoolingOutputDimensionsFormula(IOutputDimensionsFormula *formula)=0
Set the pooling output dimensions formula.
virtual int getDLACore() const =0
Get the DLA core that the engine executes on.
A RaggedSoftmax layer in a network definition.
Definition: NvInfer.h:4026
virtual unsigned getLayerCount() const =0
Get the number of layers in the RNN.
virtual int getNbOutputs() const =0
Get the number of outputs from the layer.
virtual Dims getStart() const =0
Get the start offset.
virtual void configure(const Dims *inputDims, int nbInputs, const Dims *outputDims, int nbOutputs, int maxBatchSize)=0
Configure the layer.
ScaleMode
Controls how shift, scale and power are applied in a Scale layer.
Definition: NvInfer.h:1653
Plugin class for user-implemented layers.
Definition: NvInfer.h:2849
Layer that represents a constant value.
Definition: NvInfer.h:4052
virtual Dims getPrePadding() const =0
Get the pre-padding.
virtual void setBeta(float beta)=0
Set the beta parameter (must be finite).
virtual DataType getPrecision() const =0
get the computational precision of this layer
virtual ILayer * getLayer(int index) const =0
Get the layer specified by the given index.
A Scale layer in a network definition.
Definition: NvInfer.h:1688
virtual float getBeta() const =0
Get the beta parameter.
virtual bool getBatch(void *bindings[], const char *names[], int nbBindings)=0
Get a batch of input for calibration.
Parametric softplus activation: alpha*log(exp(beta*x)+1)
virtual IRaggedSoftMaxLayer * addRaggedSoftMax(ITensor &input, ITensor &bounds)=0
Add a RaggedSoftMax layer to the network.
virtual int getNbGroups() const =0
Get the number of groups for a deconvolution.
Use SAME padding, with prePadding >= postPadding.
NHWC where C must be a multiple of 8.
virtual bool refitCudaEngine()=0
Updates associated engine. Return true if successful.
int EnumMax< EngineCapability >()
Maximum number of elements in EngineCapability enum.
Definition: NvInfer.h:5269
virtual void setNbOutputMaps(int nbOutputMaps)=0
Set the number of output maps for the convolution.
virtual void log(Severity severity, const char *msg)=0
virtual const char * getBindingName(int bindingIndex) const =0
Retrieve the name corresponding to a binding index.
virtual IRNNv2Layer * addRNNv2(ITensor &input, int32_t layerCount, int32_t hiddenSize, int32_t maxSeqLen, RNNOperation op)=0
Add an layerCount deep RNN layer to the network with hiddenSize internal states that can take a batch...
const PluginField * fields
Pointer to PluginField entries.
Definition: NvInfer.h:3398
virtual bool getFp16Mode() const =0
Query whether 16-bit kernels are permitted.
virtual IScaleLayer * addScale(ITensor &input, ScaleMode mode, Weights shift, Weights scale, Weights power)=0
Add a Scale layer to the network.
virtual void setHiddenState(ITensor &hidden)=0
Set the initial hidden state of the RNN with the provided hidden ITensor.
virtual void setPluginNamespace(const char *pluginNamespace)=0
Set the namespace that this plugin object belongs to. Ideally, all plugin objects from the same plugi...
int EnumMax< RNNOperation >()
Maximum number of elements in RNNOperation enum.
Definition: NvInfer.h:2202
int EnumMax< LayerType >()
Maximum number of elements in LayerType enum.
Definition: NvInfer.h:582
virtual void setAxes(uint32_t axes)=0
Set the axis along which softmax is computed. Currently, only one axis can be set.
static const int MAX_DIMS
The maximum number of dimensions supported for a tensor.
Definition: NvInfer.h:184
virtual void setPrePadding(DimsHW padding)=0
Set the padding that is applied at the start of the tensor.
virtual float getDynamicRange() const =0
Get dynamic range for the tensor.
virtual DataType getBindingDataType(int bindingIndex) const =0
Determine the required data type for a buffer from its binding index.
A fully connected layer in a network definition. This layer expects an input tensor of three or more ...
Definition: NvInfer.h:1226
virtual ITensor * getSequenceLengths() const =0
Get the sequence lengths specified for the RNN.
virtual Dims getPrePadding() const =0
Get the pre-padding.
Use CAFFE padding, rounding output size down.
virtual void resetDeviceType(ILayer *layer)=0
reset the DeviceType for this layer
virtual DataType getType() const =0
Get the data type of a tensor.
virtual bool supportsFormat(DataType type, PluginFormat format) const =0
Check format support.
PluginFormat
Definition: NvInfer.h:2822
TENSORRTAPI void * createInferBuilder_INTERNAL(void *logger, int version)
Internal C entry point for creating IBuilder.
virtual void setStride(Dims stride)=0
Set the slicing stride.
virtual void setKeepDimensions(bool keepDimensions)=0
Set the boolean that specifies whether or not to keep the reduced dimensions for the layer...
virtual void setBeta(float beta)=0
Set the LRN beta value.
int w() const
Get the width.
Definition: NvInfer.h:274
ReduceOperation
Enumerates the reduce operations that may be performed by a Reduce layer.
Definition: NvInfer.h:3568
virtual const char * getPluginNamespace() const =0
Return the namespace of the plugin creator object.
Definition: NvInfer.h:3395
An internal error has occurred. Execution is unrecoverable.
virtual const char * getName() const =0
Return the name of the execution context.
A LRN layer in a network definition.
Definition: NvInfer.h:1581
bias for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
int EnumMax< DeviceType >()
Maximum number of elements in DeviceType enum.
Definition: NvInfer.h:150
Descriptor for three-dimensional data.
Definition: NvInfer.h:281
virtual bool getInt8Mode() const =0
Query whether Int8 mode is used.
virtual int getNbLayers() const =0
Get the number of layers in the network.
virtual ElementWiseOperation getOperation() const =0
Get the binary operation for the layer.
virtual void setOperation(UnaryOperation op)=0
Set the unary operation for the layer.
virtual IPaddingLayer * addPadding(ITensor &input, DimsHW prePadding, DimsHW postPadding)=0
Add a padding layer to the network.
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInfer.h:591
UnaryOp operation Layer.
shift part of IScaleLayer
Definition: NvInfer.h:2092
Builds an engine from a network definition.
Definition: NvInfer.h:5321
Verbose messages with debugging information.
virtual void markOutput(ITensor &tensor)=0
Mark a tensor as a network output.
virtual DimsHW getPadding() const =0
Get the padding of the deconvolution.
virtual void setDLACore(int dlaCore)=0
Set the DLA core that the engine must execute on.
virtual void setOperation(RNNOperation op)=0
Set the operation of the RNN layer.
virtual void writeCalibrationCache(const void *ptr, std::size_t length)=0
Save a calibration cache.
virtual void setBiasWeights(Weights weights)=0
Set the bias weights for the convolution.
TENSORRTAPI nvinfer1::ILogger * getLogger()
Return the logger object.
virtual TopKOperation getOperation() const =0
Get the operation for the layer.
virtual void setRefittable(bool canRefit)=0
virtual void attachToContext(cudnnContext *, cublasContext *, IGpuAllocator *)
Attach the plugin object to an execution context and grant the plugin the access to some context reso...
Definition: NvInfer.h:3259
Layer that represents a TopK reduction.
Definition: NvInfer.h:3868
virtual PaddingMode getPaddingMode() const =0
Get the padding mode.
virtual RNNInputMode getInputMode() const =0
Get the input mode of the RNN layer.
int EnumMax< ActivationType >()
Maximum number of elements in ActivationType enum.
Definition: NvInfer.h:1301
virtual ISliceLayer * addSlice(ITensor &input, Dims start, Dims size, Dims stride)=0
Add a slice layer to the network.
virtual void setInt8Mode(bool mode)=0
Set the maximum value for a region.
virtual void setReshapeDimensions(Dims dimensions)=0
Set the reshaped dimensions.
DataType type
The type of the weights.
Definition: NvInfer.h:519
virtual float getBeta() const =0
Get the LRN beta value.
virtual void setPadding(DimsHW padding)=0
Set the padding of the deconvolution.
virtual int enqueue(int batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream)=0
Execute the layer.
virtual TensorLocation getLocation() const =0
Get the storage location of a tensor.
An RNN layer in a network definition, version 2.
Definition: NvInfer.h:2622
Register the plugin creator to the registry The static registry object will be instantiated when the ...
Definition: NvInfer.h:5930
Scaled tanh activation: alpha*tanh(beta*x)
virtual void setType(DataType type)=0
Set the data type of a tensor.
virtual ITensor * getOutput(int index) const =0
Get the output tensor specified by the given index.
virtual Permutation getSecondTranspose() const =0
Get the permutation applied by the second transpose operation.
int & h()
Get the height.
Definition: NvInfer.h:359
Elements correspond to different spatial data.
virtual DimsHW getPadding() const =0
Get the padding of the convolution. If the padding is asymmetric, the pre-padding is returned...
virtual nvinfer1::INetworkDefinition * createNetwork()=0
Create a network definition object.
Clip activation: max(alpha, min(beta, x))
virtual int getNbOutputChannels() const =0
Get the number of output channels K from the fully connected layer.
int EnumMax< WeightsRole >()
Maximum number of elements in WeightsRole enum.
Definition: NvInfer.h:5664
virtual bool getTranspose(int index) const =0
Get the transpose flag for an input tensor.
virtual Weights getWeights() const =0
Get the weights for the layer.
virtual bool getRefittable() const =0
Query whether or not engines will be refittable.
virtual void setFp16Mode(bool mode)=0
Set whether or not 16-bit kernels are permitted.
virtual void reportLayerTime(const char *layerName, float ms)=0
Layer time reporting callback.
virtual void setK(int k)=0
Set the k value for the layer.
virtual IDeconvolutionLayer * addDeconvolution(ITensor &input, int nbOutputMaps, DimsHW kernelSize, Weights kernelWeights, Weights biasWeights)=0
Add a deconvolution layer to the network.
virtual IActivationLayer * addActivation(ITensor &input, ActivationType type)=0
Add an activation layer to the network.
virtual Weights getKernelWeights() const =0
Get the kernel weights.
virtual void setName(const char *name)=0
Set the name of the execution context.
virtual int getNbOutputs() const =0
Get the number of outputs in the network.
virtual void setPluginNamespace(const char *pluginNamespace)=0
Set the namespace of the plugin creator based on the plugin library it belongs to. This can be set while registering the plugin creator.
int & n()
Get the index count.
Definition: NvInfer.h:456
Data stored on device.
int EnumMax< TopKOperation >()
Maximum number of elements in TopKOperation enum.
Definition: NvInfer.h:3856
Elements correspond to different sequence values.
virtual void serialize(void *buffer) const =0
Serialize the layer.
virtual std::size_t size() const =0
The size in bytes of the data that was allocated.
int EnumMax()
Maximum number of elements in an enumeration type.
virtual bool isDeviceTypeSet(const ILayer *layer) const =0
whether the DeviceType has been explicitly set for this layer
int c() const
Get the channel count.
Definition: NvInfer.h:352
int & w()
Get the width.
Definition: NvInfer.h:373
virtual IElementWiseLayer * addElementWise(ITensor &input1, ITensor &input2, ElementWiseOperation op)=0
Add an elementwise layer to the network.
virtual ~IGpuAllocator()
Definition: NvInfer.h:5311
virtual bool getAverageCountExcludesPadding() const =0
Get whether exclusive pooling uses as a denominator the overlap area betwen the window and the unpadd...
Layer that represents a reduction operator.
Definition: NvInfer.h:3590
virtual PaddingMode getPaddingMode() const =0
Set the padding mode.
virtual void setPrePadding(Dims padding)=0
Set the pre-padding.
Definition: NvInfer.h:5205
PoolingType
The type of pooling to perform in a pooling layer.
Definition: NvInfer.h:1378
int h() const
Get the height.
Definition: NvInfer.h:366
virtual const char * getPluginVersion() const =0
Return the plugin version. Should match the plugin version returned by the corresponding plugin creat...
virtual void detachFromContext()
Detach the plugin object from its execution context.
Definition: NvInfer.h:3267
Single registration point for all plugins in an application. It is used to find plugin implementation...
Definition: NvInfer.h:3474
Sum of the two elements.
virtual float getBlendFactor() const =0
Get the blending factor for the max_average_blend mode: max_average_blendPool = (1-blendFactor)*maxPo...
int order[Dims::MAX_DIMS]
Definition: NvInfer.h:3696
int EnumMax< RNNGateType >()
Maximum number of elements in RNNGateType enum.
Definition: NvInfer.h:2608
virtual void setPostPadding(Dims padding)=0
Set the post-padding.
virtual void setMode(ScaleMode mode)=0
Set the scale mode.
virtual ITensor * getInput(int index) const =0
Get the layer input corresponding to the given index.
virtual bool supportsFormat(DataType type, PluginFormat format) const =0
Check format support.
Inverse hyperbolic tangent.
virtual void setConvolutionOutputDimensionsFormula(IOutputDimensionsFormula *formula)=0
Set the convolution output dimensions formula.
Plugin creator class for user implemented layers.
Definition: NvInfer.h:3409
Elu activation: x>=0 ? x : alpha * (exp(x) - 1).
int & w()
Get the width.
Definition: NvInfer.h:267
virtual void setSequenceLengths(ITensor &seqLengths)=0
Specify individual sequence lengths in the batch with the ITensor pointed to by seqLengths.
Plugin factory for deserialization.
Definition: NvInfer.h:5744
virtual int getNbInputs() const =0
Get the number of inputs in the network.
virtual IPluginV2Ext * clone() const _TENSORRT_OVERRIDE=0
Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin...
virtual ITensor * getHiddenState() const =0
Get the initial hidden state of the RNN.
int length
Number of data entries in the Plugin attribute.
Definition: NvInfer.h:3384
virtual void setOperation(TopKOperation op)=0
Set the operation for the layer.
Descriptor for data with one channel dimension and two spatial dimensions.
Definition: NvInfer.h:313
virtual DimsHW getWindowSize() const =0
Get the window size for pooling.
The first element to the power of the second element.
A network definition for input to the builder.
Definition: NvInfer.h:4102
virtual int getNbLayers() const =0
Get the number of layers in the network.
virtual IHostMemory * serialize() const =0
Serialize the network to a stream.
virtual CalibrationAlgoType getAlgorithm()
Definition: NvInfer.h:5211
virtual IUnaryLayer * addUnary(ITensor &input, UnaryOperation operation)=0
Add a unary layer to the network.
Like kNONE, but transpose the matrix dimensions.
Structure containing plugin attribute field names and associated data This information can be parsed ...
Definition: NvInfer.h:3366
virtual void setOperation(ReduceOperation op)=0
Set the reduce operation for the layer.
virtual void resetDynamicRange()=0
Undo effect of setDynamicRange.
virtual int getTensorRTVersion() const
Return the version of the API the plugin creator was compiled with.
Definition: NvInfer.h:3415
virtual void destroy()=0
Destroy this object.
int EnumMax< CalibrationAlgoType >()
Maximum number of elements in CalibrationAlgoType enum.
Definition: NvInfer.h:5098
virtual RNNDirection getDirection() const =0
Get the direction of the RNN layer.
virtual DimsHW getPadding() const =0
Get the padding for pooling.
virtual const char * getPluginType() const =0
Return the plugin type. Should match the plugin name returned by the corresponding plugin creator...
virtual size_t getSerializationSize()=0
Find the size of the serialization buffer required.
virtual ITensor * getHiddenState() const =0
Get the initial hidden state of the RNN.
virtual IConstantLayer * addConstant(Dims dimensions, Weights weights)=0
Add a constant layer to the network.
virtual void setPaddingMode(PaddingMode paddingMode)=0
Set the padding mode.
virtual void setGpuAllocator(IGpuAllocator *allocator)=0
Set the GPU allocator.
virtual bool getStrictTypeConstraints() const =0
Query whether or not type constraints are strict.
Divide the first element by the second.
Layer type for pluginV2.
Definition: NvInfer.h:3327
virtual int getMinFindIterations() const =0
Query the number of minimization iterations.
Full capability, TensorRT mode without any restrictions.
virtual LayerType getType() const =0
Return the type of a layer.
virtual void setDeviceMemory(void *memory)=0
set the device memory for use by this execution context.
EngineCapability
List of supported engine capability flows.
Definition: NvInfer.h:5261
virtual float getK() const =0
Get the LRN K value.
virtual void resetOutputType(int index)=0
reset the output type for this layer
virtual RNNDirection getDirection() const =0
Get the direction of the RNN layer.
Product of the two elements.
Dims2()
Construct an empty Dims2 object.
Definition: NvInfer.h:200
virtual IProfiler * getProfiler() const =0
Get the profiler.
virtual void setPrePadding(Dims padding)=0
Set the pre-padding.
virtual IPluginV2 & getPlugin()=0
Get the plugin for the layer.
TopKOperation
Enumerates the operations that may be performed by a TopK layer.
Definition: NvInfer.h:3849
virtual Weights getWeightsForGate(int layerIndex, RNNGateType gate, bool isW) const =0
Get the weight parameters for an individual gate in the RNN.
struct CUstream_st * cudaStream_t
Forward declaration of cudaStream_t.
Definition: NvInfer.h:105
Dims3(int d0, int d1, int d2)
Construct a Dims3 from 3 elements.
Definition: NvInfer.h:300
virtual void setStride(DimsHW stride)=0
Set the stride for pooling.
virtual int getSeqLength() const =0
Get the sequence length.
Descriptor for four-dimensional data.
Definition: NvInfer.h:387
virtual DimsHW getStride() const =0
Get the stride for pooling.
Elements correspond to different channels.
virtual void terminate()=0
Release resources acquired during plugin layer initialization. This is called when the engine is dest...
virtual void setStrictTypeConstraints(bool mode)=0
Set whether or not type constraints are strict.
int EnumMax< ReduceOperation >()
Maximum number of elements in ReduceOperation enum.
Definition: NvInfer.h:3578
int EnumMax< DimensionType >()
Maximum number of elements in DimensionType enum.
Definition: NvInfer.h:168
nvinfer1::Dims field type.
ActivationType
Enumerates the types of activation to perform in an activation layer.
Definition: NvInfer.h:1284
virtual void setDeconvolutionOutputDimensionsFormula(IOutputDimensionsFormula *formula)=0
Set the deconvolution output dimensions formula.
virtual const char * getPluginNamespace() const =0
Return the namespace of the plugin object.
Descriptor for data with one index dimension, one channel dimension and two spatial dimensions...
Definition: NvInfer.h:421
virtual void setKernelWeights(Weights weights)=0
Set the kernel weights, given as a KxC matrix in row-major order.
virtual bool setDynamicRange(float min, float max)=0
Set dynamic range for the tensor.
virtual void setMaxBatchSize(int batchSize)=0
Set the maximum batch size.
virtual Dims getPostPadding() const =0
Get the padding.
virtual int getBindingIndex(const char *name) const =0
Retrieve the binding index for a named tensor.
Inverse hyperbolic cosine.
virtual IFullyConnectedLayer * addFullyConnected(ITensor &input, int nbOutputs, Weights kernelWeights, Weights biasWeights)=0
Add a fully connected layer to the network.
TENSORRTAPI nvinfer1::IPluginRegistry * getPluginRegistry()
Return the plugin registry.
virtual MatrixOperation getOperation(int index) const =0
Get the operation for an input tensor.
virtual void setGatherAxis(int axis)=0
Set the non-batch dimension axis to gather on. The axis must be less than the number of non-batch dim...
virtual int getK() const =0
Get the k value for the layer.
virtual const void * readCalibrationCache(std::size_t &length)=0
Load a calibration cache.
virtual void setBiasForGate(int layerIndex, RNNGateType gate, bool isW, Weights bias)=0
Set the bias parameters for an individual gate in the RNN.
virtual void setDeviceType(ILayer *layer, DeviceType deviceType)=0
Set the device that this layer must execute on.
virtual IReduceLayer * addReduce(ITensor &input, ReduceOperation operation, uint32_t reduceAxes, bool keepDimensions)=0
Add a reduce layer to the network.
Data stored on host.
virtual void setPadding(DimsHW padding)=0
Set the padding for pooling.
DimsHW()
Construct an empty DimsHW object.
Definition: NvInfer.h:230
virtual void setWindowSize(DimsHW windowSize)=0
Set the window size for pooling.
virtual bool registerCreator(IPluginCreator &creator, const char *pluginNamespace)=0
Register a plugin creator. Returns false if one with same type is already registered.
Use explicit padding, rounding output size up.
virtual int getDLACore() const =0
Get the DLA core that the engine executes on.
virtual void setPaddingMode(PaddingMode paddingMode)=0
Set the padding mode.
Base class for all layer classes in a network definition.
Definition: NvInfer.h:803
quantized INT8 format.
virtual void setTranspose(int index, bool val)=0
Set the transpose flag for an input tensor.
Dims4(int d0, int d1, int d2, int d3)
Construct a Dims4 from 4 elements.
Definition: NvInfer.h:407
int EnumMax< PoolingType >()
Maximum number of elements in PoolingType enum.
Definition: NvInfer.h:1386
Definition: NvInfer.h:3788
int & h()
Get the height.
Definition: NvInfer.h:484
virtual int getWindowSize() const =0
Get the LRN window size.
int & h()
Get the height.
Definition: NvInfer.h:253
virtual void setAlpha(float alpha)=0
Set the alpha parameter (must be finite).
virtual void free(void *memory)=0
virtual void reset(nvinfer1::INetworkDefinition &network)=0
Resets the builder state.
virtual void setActivationType(ActivationType type)=0
Set the type of activation to be performed.
TENSORRTAPI int getInferLibVersion()
Return the library version number.
virtual std::size_t getHiddenSize() const =0
Get the size of the hidden layers.
int & c()
Get the channel count.
Definition: NvInfer.h:470
virtual void setStride(DimsHW stride)=0
Get the stride of the deconvolution.
Structure to define the dimensions of a tensor.
Definition: NvInfer.h:181
Network iterates from first to last and vice versa and outputs concatenated.
#define NV_TENSORRT_PATCH
TensorRT patch version.
Definition: NvInfer.h:58
virtual IRNNLayer * addRNN(ITensor &inputs, int layerCount, std::size_t hiddenSize, int maxSeqLen, RNNOperation op, RNNInputMode mode, RNNDirection dir, Weights weights, Weights bias)=0
Add an layerCount deep RNN layer to the network with a sequence length of maxSeqLen and hiddenSize in...
virtual bool precisionIsSet() const =0
whether the computational precision has been set for this layer
virtual float getDynamicRangeMax() const =0
Get maximum of dynamic range.
virtual void setReduceAxes(uint32_t reduceAxes)=0
Set which axes to reduce for the layer.
virtual void setGpuAllocator(IGpuAllocator *allocator)=0
Set the GPU allocator.
virtual bool canBroadcastInputAcrossBatch(int inputIndex) const =0
Return true if plugin can use input that is broadcast across batch without replication.
Layer type for shuffling data.
Definition: NvInfer.h:3708
virtual bool platformHasFastFp16() const =0
Determine whether the platform has fast native fp16.
DimsCHW(int channels, int height, int width)
Construct a DimsCHW given channel count, height and width.
Definition: NvInfer.h:333
Definition: NvInfer.h:5190
A elementwise layer in a network definition.
Definition: NvInfer.h:2062
A Pooling layer in a network definition.
Definition: NvInfer.h:1401
virtual void setDefaultDeviceType(DeviceType deviceType)=0
Sets the default DeviceType to be used by the builder. It ensures that all the layers that can run on...
virtual RNNOperation getOperation() const =0
Get the operation of the RNN layer.
virtual void serialize(void *buffer)=0
Serialize the layer.
virtual void setDilation(DimsHW dims)=0
Set the dilation for a convolution.
virtual void setOperation(RNNOperation op)=0
Set the operation of the RNN layer.
virtual void configureWithFormat(const Dims *inputDims, int nbInputs, const Dims *outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize)=0
Configure the layer.
virtual DimsHW getPrePadding() const =0
Set the padding that is applied at the start of the tensor.
virtual void setDirection(RNNDirection op)=0
Set the direction of the RNN layer.
int d[MAX_DIMS]
The extent of each dimension.
Definition: NvInfer.h:186
Application-implemented interface to compute layer output sizes.
Definition: NvInfer.h:2790
Minimum of the two elements.
virtual Weights getBiasWeights() const =0
Get the bias weights.
#define NV_TENSORRT_MINOR
TensorRT minor version.
Definition: NvInfer.h:57
virtual Dims getPostPadding() const =0
Get the post-padding.
virtual int getNbBindings() const =0
Get the number of binding indices.
virtual void setPadding(DimsHW padding)=0
Set the padding of the convolution.
virtual Dims getDimensions() const =0
Get the dimensions of a tensor.
virtual void removeTensor(ITensor &tensor)=0
remove a tensor from the network definition.
int getTensorRTVersion() const _TENSORRT_OVERRIDE
Return the API version with which this plugin was built. The upper byte reserved by TensorRT and is u...
Definition: NvInfer.h:3283
ElementWiseOperation
Enumerates the binary operations that may be performed by an ElementWise layer.
Definition: NvInfer.h:2034
int64_t count
The number of weights in the array.
Definition: NvInfer.h:521
virtual int getMaxDLABatchSize() const =0
Get the maximum batch size DLA can support. For any tensor the total volume of index dimensions combi...
Three-gate network consisting of Gated Recurrent Units.
virtual IPluginV2 * deserializePlugin(const char *name, const void *serialData, size_t serialLength)=0
Called during deserialization of plugin layer. Return a plugin object.
virtual const char * getName() const =0
Get the tensor name.
RNNGateType
Identifies an individual gate within an RNN cell.
Definition: NvInfer.h:2596
Network iterations from first input to last input.
virtual void setBiasWeights(Weights weights)=0
Set the bias weights for the deconvolution.
int & c()
Get the channel count.
Definition: NvInfer.h:345
virtual void setSecondTranspose(Permutation permutation)=0
Set the permutation applied by the second transpose operation.
virtual void setEngineCapability(EngineCapability capability)=0
Configure the builder to target specified EngineCapability flow.
virtual bool getKeepDimensions() const =0
Get the boolean that specifies whether or not to keep the reduced dimensions for the layer...
virtual bool isNetworkOutput() const =0
Whether the tensor is a network output.
Dims3()
Construct an empty Dims3 object.
Definition: NvInfer.h:287
virtual bool getBroadcastAcrossBatch() const =0
Check if tensor is broadcast across the batch.
virtual IPluginV2 * clone() const =0
Clone the plugin object. This copies over internal plugin parameters and returns a new plugin object ...
virtual ITensor * getCellState() const =0
Get the initial cell state of the RNN.
virtual void setCellState(ITensor &cell)=0
Set the initial cell state of the LSTM with the provided cell ITensor.
A tensor in a network definition.
Definition: NvInfer.h:612
virtual void destroy()=0
Destroy this object;.
virtual void setLocation(TensorLocation location)=0
Set the storage location of a tensor.
virtual IPluginCreator *const * getPluginCreatorList(int *numCreators) const =0
Return all the registered plugin creators and the number of registered plugin creators. Returns nullptr if none found.
kernel for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
int EnumMax< UnaryOperation >()
Maximum number of elements in UnaryOperation enum.
Definition: NvInfer.h:3530
virtual void setBlendFactor(float blendFactor)=0
Set the blending factor for the max_average_blend mode: max_average_blendPool = (1-blendFactor)*maxPo...
An array of weights used as a layer parameter.
Definition: NvInfer.h:516
int & w()
Get the width.
Definition: NvInfer.h:498
const char * name
Plugin field attribute name.
Definition: NvInfer.h:3371
virtual int getNbOutputs() const =0
Get the number of outputs from the layer.
int h() const
Get the height.
Definition: NvInfer.h:260
virtual int getDataLength() const =0
Get the length of the data being processed by the RNN for use in computing other values.
virtual DeviceType getDeviceType(const ILayer *layer) const =0
Get the device that this layer executes on.
virtual IMatrixMultiplyLayer * addMatrixMultiply(ITensor &input0, MatrixOperation op0, ITensor &input1, MatrixOperation op1)=0
Add a MatrixMultiply layer to the network.
virtual nvinfer1::ICudaEngine * deserializeCudaEngine(const void *blob, std::size_t size, IPluginFactory *pluginFactory)=0
Deserialize an engine from a stream.
virtual EngineCapability getEngineCapability() const =0
Query EngineCapability flow configured for the builder.
virtual nvinfer1::ICudaEngine * buildCudaEngine(nvinfer1::INetworkDefinition &network)=0
Build a CUDA engine from a network definition.
Dims2(int d0, int d1)
Construct a Dims2 from 2 elements.
Definition: NvInfer.h:212
virtual Dims getSize() const =0
Get the output dimension.
virtual Weights getShift() const =0
Get the shift value.
virtual void setAxis(int axis)=0
Set the axis along which concatenation occurs.
virtual int getNbOutputMaps() const =0
Get the number of output feature maps for the deconvolution.
Dims4()
Construct an empty Dims2 object.
Definition: NvInfer.h:393
virtual void setScale(Weights scale)=0
Set the scale value.
virtual void setInput(int index, ITensor &tensor)=0
replace an input of this layer with a specific tensor
virtual IPoolingLayer * addPooling(ITensor &input, PoolingType type, DimsHW windowSize)=0
Add a pooling layer to the network.
int nbDims
The number of dimensions.
Definition: NvInfer.h:185
virtual bool platformHasFastInt8() const =0
Determine whether the platform has fast native int8.
virtual int enqueue(int batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream)=0
Execute the layer.
virtual DimsHW getStride() const =0
Get the stride of the convolution.
Application-implemented interface for calibration.
Definition: NvInfer.h:5114
virtual void destroy()=0
Destroy the plugin object. This will be called when the network, builder or engine is destroyed...
#define _TENSORRT_OVERRIDE
Defines which symbols are exported.
Definition: NvInfer.h:70
Application-implemented logging interface for the builder, engine and runtime.
Definition: NvInfer.h:5831
virtual DataType getOutputType(int index) const =0
get the output type of this layer
virtual IPluginLayer * addPluginExt(ITensor *const *inputs, int nbInputs, IPluginExt &plugin)=0
Add a plugin layer to the network using an IPluginExt interface.
virtual void setDirection(RNNDirection op)=0
Set the direction of the RNN layer.
virtual bool setWeights(const char *layerName, WeightsRole role, Weights weights)=0
Specify new weights for a layer of given name. Returns true on success, or false if new weights are r...
virtual void setInt8Calibrator(IInt8Calibrator *calibrator)=0
Set Int8 Calibration interface.
virtual bool outputTypeIsSet(int index) const =0
whether the output type has been set for this layer
DimsHW(int height, int width)
Construct a DimsHW given height and width.
Definition: NvInfer.h:242
virtual void * data() const =0
A pointer to the raw data that is owned by the library.
Identical coefficients across all elements of the tensor.
int EnumMax< ElementWiseOperation >()
Maximum number of elements in ElementWiseOperation enum.
Definition: NvInfer.h:2046
Plugin class for user-implemented layers.
Definition: NvInfer.h:2953
Layer that represents a padding operation.
Definition: NvInfer.h:3649
virtual void setBiasWeights(Weights weights)=0
Set the bias weights.
virtual IIdentityLayer * addIdentity(ITensor &input)=0
Add an identity layer.
virtual void setNbOutputMaps(int nbOutputMaps)=0
Set the number of output feature maps for the deconvolution.
virtual size_t getWorkspaceSize(int maxBatchSize) const =0
Find the workspace size required by the layer.
virtual void setNbOutputChannels(int nbOutputs)=0
Set the number of output channels K from the fully connected layer.
virtual void setKernelWeights(Weights weights)=0
Set the kernel weights for the convolution.
virtual Weights getBiasWeights() const =0
Get the bias weights for the convolution.
virtual size_t getWorkspaceSize(int maxBatchSize) const =0
Find the workspace size required by the layer.
Safety restricted capability, TensorRT flow that can only run on GPU devices.
int EnumMax< MatrixOperation >()
Maximum number of elements in MatrixOperation enum.
Definition: NvInfer.h:3941
virtual uint32_t getReduceAxes() const =0
Get the axes over which to reduce for the layer.
virtual ITensor * getOutput(int index) const =0
Get the layer output corresponding to the given index.
virtual IPlugin & getPlugin()=0
Get the plugin for the layer.
virtual void setInputMode(RNNInputMode op)=0
Set the operation of the RNN layer.
virtual void setNbGroups(int nbGroups)=0
Set the number of groups for a convolution.
virtual Dims getStride() const =0
Get the slicing stride.
virtual void setCellState(ITensor &cell)=0
Set the initial cell state of the RNN with the provided cell ITensor.
virtual bool isRefittable() const =0
Return true if engine can be refit.
virtual void setKernelSize(DimsHW kernelSize)=0
Set the HW kernel size of the convolution.
CalibrationAlgoType
Version of calibration algorithm to use.
Definition: NvInfer.h:5090
virtual void setStride(DimsHW stride)=0
Get the stride of the convolution.
virtual float getAlpha() const =0
Get the LRN alpha value.
virtual void setReduceAxes(uint32_t reduceAxes)=0
Set the axes over which to reduce.
Informational messages with instructional information.
virtual CalibrationAlgoType getAlgorithm()
Definition: NvInfer.h:5182
Use explicit padding, rounding output size down.
virtual IConvolutionLayer * addConvolution(ITensor &input, int nbOutputMaps, DimsHW kernelSize, Weights kernelWeights, Weights biasWeights)=0
Add a convolution layer to the network.
virtual void setFirstTranspose(Permutation permutation)=0
Set the permutation applied by the first transpose operation.
DimsNCHW()
Construct an empty DimsNCHW object.
Definition: NvInfer.h:427
Class to handle library allocated memory that is accessible to the user.
Definition: NvInfer.h:534
LayerType
The type values of layer classes.
Definition: NvInfer.h:552
Selu activation: x>0 ? beta * x : beta * (alpha*exp(x) - alpha)
int EnumMax< PaddingMode >()
Maximum number of elements in PaddingMode enum.
Definition: NvInfer.h:973
virtual void setPostPadding(Dims padding)=0
Set the post-padding.
int EnumMax< ScaleMode >()
Maximum number of elements in ScaleMode enum.
Definition: NvInfer.h:1661
A layer that represents the identity function.
Definition: NvInfer.h:4040
virtual bool canRunOnDLA(const ILayer *layer) const =0
Checks if a layer can run on DLA.
virtual DimsHW compute(DimsHW inputDims, DimsHW kernelSize, DimsHW stride, DimsHW padding, DimsHW dilation, const char *layerName) const =0
Application-implemented interface to compute the HW output dimensions of a layer from the layer input...
virtual void setDimensions(Dims dimensions)=0
Set the dimensions for the layer.
virtual Weights getBiasWeights() const =0
Get the bias weights for the deconvolution.
virtual void setProfiler(IProfiler *)=0
Set the profiler.
virtual void setWeights(Weights weights)=0
Set the weights for the layer.
virtual void setSize(Dims size)=0
Set the output dimension.
RNNInputMode
Enumerates the RNN input modes that may occur with an RNN layer.
Definition: NvInfer.h:2241
A RNN layer in a network definition.
Definition: NvInfer.h:2264
virtual void setName(const char *name)=0
Set the name of a layer.
virtual ActivationType getActivationType() const =0
Get the type of activation to be performed.
virtual Dims getDimensions() const =0
Get the dimensions for the layer.
virtual void setDebugSync(bool sync)=0
Set the debug sync flag.
virtual void destroy()=0
Destroy this object.
virtual void unmarkOutput(ITensor &tensor)=0
unmark a tensor as a network output.
virtual void setPostPadding(DimsHW padding)=0
Set the padding that is applied at the end of the tensor.
void configureWithFormat(const Dims *, int, const Dims *, int, DataType, PluginFormat, int) _TENSORRT_OVERRIDE _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInfer.h:3291
Layer type for plugins.
Definition: NvInfer.h:3304
virtual UnaryOperation getOperation() const =0
Get the unary operation for the layer.
virtual bool isOutputBroadcastAcrossBatch(int outputIndex, const bool *inputIsBroadcasted, int nbInputs) const =0
Return true if output tensor is broadcast across a batch.
virtual void setOperation(int index, MatrixOperation op)=0
Set the operation for an input tensor.
virtual void setPaddingMode(PaddingMode paddingMode)=0
Set the padding mode.
virtual void setPostPadding(Dims padding)=0
Set the post-padding.
DimensionType
The type of data encoded across this dimension.
Definition: NvInfer.h:159
virtual IConcatenationLayer * addConcatenation(ITensor *const *inputs, int nbInputs)=0
Add a concatenation layer to the network.
Four-gate LSTM network w/o peephole connections.
virtual DimsHW getPostPadding() const =0
Set the padding that is applied at the end of the tensor.
Thresholded ReLU activation: x>alpha : x : 0.
virtual const ICudaEngine & getEngine() const =0
Get the associated engine.
virtual void * allocate(uint64_t size, uint64_t alignment, uint32_t flags)=0
virtual void setHiddenState(ITensor &hidden)=0
Set the initial hidden state of the RNN with the provided hidden ITensor.
virtual TensorLocation getLocation(int bindingIndex) const =0
Get location of binding.
virtual ScaleMode getMode() const =0
Set the scale mode.
virtual ILRNLayer * addLRN(ITensor &input, int window, float alpha, float beta, float k)=0
Add a LRN layer to the network.
virtual IPluginCreator * getPluginCreator(const char *pluginType, const char *pluginVersion, const char *pluginNamespace="")=0
Return plugin creator based on plugin type, version and namespace associated with plugin during netwo...
virtual int getTensorRTVersion() const
Return the API version with which this plugin was built.
Definition: NvInfer.h:2961
virtual int getAll(int size, const char **layerNames, WeightsRole *roles)=0
Get description of all weights that could be refit.
virtual void setMaxWorkspaceSize(std::size_t workspaceSize)=0
Set the maximum workspace size.
virtual void setShift(Weights shift)=0
Set the shift value.
virtual IPluginV2Layer * addPluginV2(ITensor *const *inputs, int nbInputs, IPluginV2 &plugin)=0
Add a plugin layer to the network using the IPluginV2 interface.
virtual float getAlpha() const =0
Get the alpha parameter.
virtual size_t getDeviceMemorySize() const =0
Return the amount of device memory required by an execution context.
virtual Weights getKernelWeights() const =0
Get the kernel weights for the deconvolution.
virtual int getNbDLACores() const =0
Returns number of DLA hardware cores accessible.
virtual Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims)=0
Get the dimension of an output tensor.
Application-implemented class for controlling allocation on the GPU.
Definition: NvInfer.h:5279
Context for executing inference using an engine.
Definition: NvInfer.h:4816
virtual ITensor * getInput(int index) const =0
Get the input tensor specified by the given index.
Updates weights in an engine.
Definition: NvInfer.h:5676
virtual void setPower(Weights power)=0
Set the power value.
virtual double getRegressionCutoff() const =0
The fraction (between 0 and 1) of the maximum used to define the regression cutoff when using regress...
virtual void setHalf2Mode(bool mode)=0
Set whether half2 mode is used.
virtual void setOutputType(int index, DataType dataType)=0
Set the output type of this layer.
virtual std::size_t getMaxWorkspaceSize() const =0
Get the maximum workspace size.
virtual int getAxis() const =0
Get the axis along which concatenation occurs.
Descriptor for two-dimensional spatial data.
Definition: NvInfer.h:224
UnaryOperation
Enumerates the unary operations that may be performed by a Unary layer.
Definition: NvInfer.h:3506
virtual void setAverageFindIterations(int avgFind)=0
Set the number of averaging iterations used when timing layers.
virtual int getNbOutputMaps() const =0
Get the number of output maps for the convolution.
virtual int getNbDLACores() const =0
Returns number of DLA hardware cores accessible.
virtual DeviceType getDefaultDeviceType() const =0
Get the default DeviceType which was set by setDefaultDeviceType.
virtual RNNOperation getOperation() const =0
Get the operation of the RNN layer.
virtual nvinfer1::DataType getOutputDataType(int index, const nvinfer1::DataType *inputTypes, int nbInputs) const =0
Return the DataType of the plugin output at the requested index. The default behavior should be to re...
DeviceType
The device that this layer/network will execute on.
Definition: NvInfer.h:144
virtual Dims getPostPadding() const =0
Get the padding.
virtual bool getHalf2Mode() const =0
Query whether half2 mode is used.
virtual int getNbInputs() const =0
Get the number of inputs of a layer.
virtual ISoftMaxLayer * addSoftMax(ITensor &input)=0
Add a SoftMax layer to the network.
Elements correspond to different batch index.
virtual Weights getBiasForGate(int layerIndex, RNNGateType gate, bool isW) const =0
Get the bias parameters for an individual gate in the RNN.
Descriptor for two-dimensional data.
Definition: NvInfer.h:194
virtual int getMaxBatchSize() const =0
Get the maximum batch size which can be used for inference.
virtual void setName(const char *name)=0
Set the tensor name.
virtual IOutputDimensionsFormula & getPoolingOutputDimensionsFormula() const =0
Get the pooling output dimensions formula.
LeakyRelu activation: x>=0 ? x : alpha * x.
virtual int getNbOutputs() const =0
Get the number of outputs of a layer.
virtual IGatherLayer * addGather(ITensor &data, ITensor &indices, int axis)=0
Add a gather layer to the network.
virtual IOutputDimensionsFormula & getDeconvolutionOutputDimensionsFormula() const =0
Get the deconvolution output dimensions formula.
virtual void setDimensions(Dims dimensions)=0
Set the dimensions of a tensor.
virtual void setWindowSize(int windowSize)=0
Set the LRN window size.
virtual Weights getKernelWeights() const =0
Get the kernel weights for the convolution.
virtual CalibrationAlgoType getAlgorithm()=0
Get the algorithm used by this calibrator.
Application-implemented interface for profiling.
Definition: NvInfer.h:4790
virtual RNNInputMode getInputMode() const =0
Get the operation of the RNN layer.
A concatenation layer in a network definition.
Definition: NvInfer.h:1803
virtual void setK(float k)=0
Set the LRN K value.
Plugin class for user-implemented layers.
Definition: NvInfer.h:3180
virtual void setNbGroups(int nbGroups)=0
Set the number of groups for a deconvolution.
const void * data
Plugin field attribute data.
Definition: NvInfer.h:3375
Hard sigmoid activation: max(0, min(1, alpha*x+beta))
virtual IPluginLayer * addPlugin(ITensor *const *inputs, int nbInputs, IPlugin &plugin)=0
Add a plugin layer to the network.
int EnumMax< RNNDirection >()
Maximum number of elements in RNNDirection enum.
Definition: NvInfer.h:2221
virtual const void * readHistogramCache(std::size_t &length)=0
Load a histogram.
virtual void allowGPUFallback(bool setFallBackMode)=0
Sets the builder to use GPU if a layer that was supposed to run on DLA can not run on DLA...
TENSORRTAPI void * createInferRefitter_INTERNAL(void *engine, void *logger, int version)
Internal C entry point for creating IRefitter.
int nbFields
Number of PluginField entries.
Definition: NvInfer.h:3397
virtual Permutation getFirstTranspose() const =0
Get the permutation applied by the first transpose operation.
virtual const char * getPluginVersion() const =0
Return the plugin version.
WeightsRole
How a layer uses particular Weights.
Definition: NvInfer.h:5654
virtual IExecutionContext * createExecutionContextWithoutDeviceMemory()=0
create an execution context without any device memory allocated
virtual void setKernelSize(DimsHW kernelSize)=0
Set the HW kernel size of the convolution.
virtual void setWeights(Weights weights)=0
Set the weight parameters for the RNN.
virtual Weights getWeights() const =0
Get the W weights for the RNN.
virtual const char * getPluginName() const =0
Return the plugin name.
virtual void terminate()=0
Release resources acquired during plugin layer initialization. This is called when the engine is dest...
virtual bool dynamicRangeIsSet() const =0
Query whether dynamic range is set.
virtual PoolingType getPoolingType() const =0
Get the type of activation to be performed.
virtual int getMissing(int size, const char **layerNames, WeightsRole *roles)=0
Get description of missing weights.
PluginFieldType type
Plugin field attribute type.
Definition: NvInfer.h:3380
Inverse hyperbolic sine.
A deconvolution layer in a network definition.
Definition: NvInfer.h:1836
virtual IShuffleLayer * addShuffle(ITensor &input)=0
Add a shuffle layer to the network.
virtual void setPoolingType(PoolingType type)=0
Set the type of activation to be performed.
virtual IOutputDimensionsFormula & getConvolutionOutputDimensionsFormula() const =0
Get the convolution output dimensions formula.
virtual void setDebugSync(bool sync)=0
Set whether the builder should use debug synchronization.
Softsign activation: x / (1+|x|)
virtual uint32_t getReduceAxes() const =0
Get the axes to reduce for the layer.
virtual int initialize()=0
Initialize the layer for execution. This is called when the engine is created.
Use CAFFE padding, rounding output size up.
Definition: NvInfer.h:5176
int EnumMax< RNNInputMode >()
Maximum number of elements in RNNInputMode enum.
Definition: NvInfer.h:2248