TensorRT
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
NvInfer.h
Go to the documentation of this file.
1 /*
2  * Copyright 1993-2018 NVIDIA Corporation. All rights reserved.
3  *
4  * NOTICE TO LICENSEE:
5  *
6  * This source code and/or documentation ("Licensed Deliverables") are
7  * subject to NVIDIA intellectual property rights under U.S. and
8  * international Copyright laws.
9  *
10  * These Licensed Deliverables contained herein is PROPRIETARY and
11  * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12  * conditions of a form of NVIDIA software license agreement by and
13  * between NVIDIA and Licensee ("License Agreement") or electronically
14  * accepted by Licensee. Notwithstanding any terms or conditions to
15  * the contrary in the License Agreement, reproduction or disclosure
16  * of the Licensed Deliverables to any third party without the express
17  * written consent of NVIDIA is prohibited.
18  *
19  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20  * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21  * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22  * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23  * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24  * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25  * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26  * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27  * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28  * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29  * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30  * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32  * OF THESE LICENSED DELIVERABLES.
33  *
34  * U.S. Government End Users. These Licensed Deliverables are a
35  * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36  * 1995), consisting of "commercial computer software" and "commercial
37  * computer software documentation" as such terms are used in 48
38  * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39  * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40  * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41  * U.S. Government End Users acquire the Licensed Deliverables with
42  * only those rights set forth herein.
43  *
44  * Any use of the Licensed Deliverables in individual and commercial
45  * software must include, in the user documentation and internal
46  * comments to the code, the above Disclaimer and U.S. Government End
47  * Users Notice.
48  */
49 
50 #ifndef NV_INFER_H
51 #define NV_INFER_H
52 
53 #include <cstddef>
54 #include <cstdint>
55 
56 #define NV_TENSORRT_MAJOR 4
57 #define NV_TENSORRT_MINOR 0
58 #define NV_TENSORRT_PATCH 1
59 #define NV_TENSORRT_BUILD 3
60 
61 #define NV_TENSORRT_SONAME_MAJOR 4
62 #define NV_TENSORRT_SONAME_MINOR 1
63 #define NV_TENSORRT_SONAME_PATCH 2
64 
65 #if __cplusplus > 201103L
66 #define _TENSORRT_FINAL final
67 #else
68 #define _TENSORRT_FINAL
69 #endif
70 
72 #ifdef TENSORRT_BUILD_LIB
73 #define TENSORRTAPI __attribute__ ((visibility ("default")))
74 #else
75 #define TENSORRTAPI
76 #endif
77 
86 
92 
93 // forward declare some CUDA types to avoid an include dependency
94 
95 struct cublasContext;
96 struct cudnnContext;
97 
98 typedef struct CUstream_st* cudaStream_t;
99 typedef struct CUevent_st* cudaEvent_t;
100 
101 static const int NV_TENSORRT_VERSION = (NV_TENSORRT_MAJOR * 1000) + (NV_TENSORRT_MINOR * 100) + NV_TENSORRT_PATCH; // major, minor, patch
102 
108 namespace nvinfer1
109 {
110 
111 template <typename T>
112 inline int EnumMax();
113 
118 enum class DataType : int
119 {
120  kFLOAT = 0,
121  kHALF = 1,
122  kINT8 = 2,
123  kINT32 = 3
124 };
125 
126 template <>
127 inline int EnumMax<DataType>()
128 {
129  return 4;
130 }
131 
136 enum class DimensionType : int
137 {
138  kSPATIAL = 0,
139  kCHANNEL = 1,
140  kINDEX = 2,
141  kSEQUENCE = 3
142 };
143 
144 template <>
146 {
147  return 4;
148 }
149 
158 class Dims
159 {
160 public:
161  static const int MAX_DIMS = 8;
162  int nbDims;
163  int d[MAX_DIMS];
165 };
166 
171 class Dims2 : public Dims
172 {
173 public:
178  {
179  nbDims = 2;
180  d[0] = d[1] = 0;
181  }
182 
189  Dims2(int d0, int d1)
190  {
191  nbDims = 2;
192  d[0] = d0;
193  d[1] = d1;
194  }
195 };
196 
201 class DimsHW : public Dims2
202 {
203 public:
208  : Dims2()
209  {
211  }
212 
219  DimsHW(int height, int width)
220  : Dims2(height, width)
221  {
223  }
224 
230  int& h() { return d[0]; }
231 
237  int h() const { return d[0]; }
238 
244  int& w() { return d[1]; }
245 
251  int w() const { return d[1]; }
252 };
253 
258 class Dims3 : public Dims
259 {
260 public:
265  {
266  nbDims = 3;
267  d[0] = d[1] = d[2] = 0;
268  }
269 
277  Dims3(int d0, int d1, int d2)
278  {
279  nbDims = 3;
280  d[0] = d0;
281  d[1] = d1;
282  d[2] = d2;
283  }
284 };
285 
290 class DimsCHW : public Dims3
291 {
292 public:
297  : Dims3()
298  {
301  }
302 
310  DimsCHW(int channels, int height, int width)
311  : Dims3(channels, height, width)
312  {
315  }
316 
322  int& c() { return d[0]; }
323 
329  int c() const { return d[0]; }
330 
336  int& h() { return d[1]; }
337 
343  int h() const { return d[1]; }
344 
350  int& w() { return d[2]; }
351 
357  int w() const { return d[2]; }
358 };
359 
364 class Dims4 : public Dims
365 {
366 public:
371  {
372  nbDims = 4;
373  d[0] = d[1] = d[2] = d[3] = 0;
374  }
375 
384  Dims4(int d0, int d1, int d2, int d3)
385  {
386  nbDims = 4;
387  d[0] = d0;
388  d[1] = d1;
389  d[2] = d2;
390  d[3] = d3;
391  }
392 };
393 
398 class DimsNCHW : public Dims4
399 {
400 public:
405  : Dims4()
406  {
410  }
411 
420  DimsNCHW(int batchSize, int channels, int height, int width)
421  : Dims4(batchSize, channels, height, width)
422  {
426  }
427 
433  int& n() { return d[0]; }
434 
440  int n() const { return d[0]; }
441 
447  int& c() { return d[1]; }
448 
454  int c() const { return d[1]; }
455 
461  int& h() { return d[2]; }
462 
468  int h() const { return d[2]; }
469 
475  int& w() { return d[3]; }
476 
482  int w() const { return d[3]; }
483 };
484 
493 class Weights
494 {
495 public:
497  const void* values;
498  int64_t count;
499 };
500 
510 {
511 public:
512  virtual void* data() const = 0;
513  virtual std::size_t size() const = 0;
514  virtual DataType type() const = 0;
515  virtual void destroy() = 0;
516 protected:
517  virtual ~IHostMemory() {}
518 };
519 
527 enum class LayerType : int
528 {
529  kCONVOLUTION = 0,
530  kFULLY_CONNECTED = 1,
531  kACTIVATION = 2,
532  kPOOLING = 3,
533  kLRN = 4,
534  kSCALE = 5,
535  kSOFTMAX = 6,
536  kDECONVOLUTION = 7,
537  kCONCATENATION = 8,
538  kELEMENTWISE = 9,
539  kPLUGIN = 10,
540  kRNN = 11,
541  kUNARY = 12,
542  kPADDING = 13,
543  kSHUFFLE = 14,
544  kREDUCE = 15,
545  kTOPK = 16,
546  kGATHER = 17,
547  kMATRIX_MULTIPLY = 18,
548  kRAGGED_SOFTMAX = 19,
549  kCONSTANT = 20,
550  kRNN_V2 = 21
551 };
552 
553 template <>
554 inline int EnumMax<LayerType>()
555 {
556  return 22;
557 }
558 
563 enum class TensorLocation : int
564 {
565  kDEVICE = 0,
566  kHOST = 1
567 };
568 
569 template <>
571 {
572  return 2;
573 }
574 
580 class ITensor
581 {
582 public:
595  virtual void setName(const char* name) = 0;
596 
604  virtual const char* getName() const = 0;
605 
620  virtual void setDimensions(Dims dimensions) = 0; // only valid for input tensors
621 
629  virtual Dims getDimensions() const = 0;
630 
644  virtual void setType(DataType type) = 0;
645 
653  virtual DataType getType() const = 0;
654 
658  virtual bool isNetworkInput() const = 0;
659 
663  virtual bool isNetworkOutput() const = 0;
664 
665 protected:
666  virtual ~ITensor() {}
667 
668 public:
683  virtual void setBroadcastAcrossBatch(bool broadcastAcrossBatch) = 0;
684 
695  virtual bool getBroadcastAcrossBatch() const = 0;
696 
702  virtual TensorLocation getLocation() const = 0;
703 
714  virtual void setLocation(TensorLocation location) = 0;
715 };
716 
722 class ILayer
723 {
724 public:
730  virtual LayerType getType() const = 0;
731 
739  virtual void setName(const char* name) = 0;
740 
746  virtual const char* getName() const = 0;
747 
751  virtual int getNbInputs() const = 0;
752 
760  virtual ITensor* getInput(int index) const = 0;
761 
765  virtual int getNbOutputs() const = 0;
766 
772  virtual ITensor* getOutput(int index) const = 0;
773 
774 protected:
775  virtual ~ILayer() {}
776 };
777 
789 class IConvolutionLayer : public ILayer
790 {
791 public:
797  virtual void setKernelSize(DimsHW kernelSize) = 0;
798 
804  virtual DimsHW getKernelSize() const = 0;
805 
811  virtual void setNbOutputMaps(int nbOutputMaps) = 0;
812 
818  virtual int getNbOutputMaps() const = 0;
819 
827  virtual void setStride(DimsHW stride) = 0;
828 
832  virtual DimsHW getStride() const = 0;
833 
843  virtual void setPadding(DimsHW padding) = 0;
844 
850  virtual DimsHW getPadding() const = 0; // padding defaults to 0
851 
864  virtual void setNbGroups(int nbGroups) = 0;
865 
871  virtual int getNbGroups() const = 0;
872 
881  virtual void setKernelWeights(Weights weights) = 0;
882 
888  virtual Weights getKernelWeights() const = 0;
889 
899  virtual void setBiasWeights(Weights weights) = 0;
900 
906  virtual Weights getBiasWeights() const = 0;
907 
915  virtual void setDilation(DimsHW dims) = 0;
916 
922  virtual DimsHW getDilation() const = 0;
923 
924 protected:
925  virtual ~IConvolutionLayer() {}
926 };
927 
955 {
956 public:
962  virtual void setNbOutputChannels(int nbOutputs) = 0;
963 
969  virtual int getNbOutputChannels() const = 0;
970 
976  virtual void setKernelWeights(Weights weights) = 0;
977 
983  virtual Weights getKernelWeights() const = 0;
984 
992  virtual void setBiasWeights(Weights weights) = 0;
993 
999  virtual Weights getBiasWeights() const = 0;
1000 
1001 protected:
1002  virtual ~IFullyConnectedLayer() {}
1003 };
1004 
1010 enum class ActivationType : int
1011 {
1012  kRELU = 0,
1013  kSIGMOID = 1,
1014  kTANH = 2
1015 };
1016 
1017 template <>
1019 {
1020  return 3;
1021 }
1022 
1032 class IActivationLayer : public ILayer
1033 {
1034 public:
1040  virtual void setActivationType(ActivationType type) = 0;
1041 
1047  virtual ActivationType getActivationType() const = 0;
1048 
1049 protected:
1050  virtual ~IActivationLayer() {}
1051 };
1052 
1058 enum class PoolingType : int
1059 {
1060  kMAX = 0, // Maximum over elements
1061  kAVERAGE = 1, // Average over elements. If the tensor is padded, the count includes the padding
1062  kMAX_AVERAGE_BLEND = 2 // Blending between the max pooling and average pooling: (1-blendFactor)*maxPool + blendFactor*avgPool
1063 };
1064 
1065 template <>
1067 {
1068  return 3;
1069 }
1070 
1079 class IPoolingLayer : public ILayer
1080 {
1081 public:
1087  virtual void setPoolingType(PoolingType type) = 0;
1088 
1094  virtual PoolingType getPoolingType() const = 0;
1095 
1101  virtual void setWindowSize(DimsHW windowSize) = 0;
1102 
1108  virtual DimsHW getWindowSize() const = 0;
1109 
1117  virtual void setStride(DimsHW stride) = 0;
1118 
1124  virtual DimsHW getStride() const = 0;
1125 
1133  virtual void setPadding(DimsHW padding) = 0;
1134 
1142  virtual DimsHW getPadding() const = 0;
1143 
1151  virtual void setBlendFactor(float blendFactor) = 0;
1152 
1160  virtual float getBlendFactor() const = 0;
1161 
1170  virtual void setAverageCountExcludesPadding(bool exclusive) = 0;
1171 
1177  virtual bool getAverageCountExcludesPadding() const = 0;
1178 
1179 protected:
1180  virtual ~IPoolingLayer() {}
1181 };
1182 
1190 class ILRNLayer : public ILayer
1191 {
1192 public:
1199  virtual void setWindowSize(int windowSize) = 0;
1200 
1206  virtual int getWindowSize() const = 0;
1207 
1214  virtual void setAlpha(float alpha) = 0;
1215 
1221  virtual float getAlpha() const = 0;
1222 
1229  virtual void setBeta(float beta) = 0;
1230 
1236  virtual float getBeta() const = 0;
1237 
1244  virtual void setK(float k) = 0;
1245 
1251  virtual float getK() const = 0;
1252 
1253 protected:
1254  virtual ~ILRNLayer() {}
1255 };
1256 
1262 enum class ScaleMode : int
1263 {
1264  kUNIFORM = 0,
1265  kCHANNEL = 1,
1266  kELEMENTWISE = 2
1267 };
1268 
1269 template <>
1271 {
1272  return 3;
1273 }
1274 
1295 class IScaleLayer : public ILayer
1296 {
1297 public:
1303  virtual void setMode(ScaleMode mode) = 0;
1304 
1310  virtual ScaleMode getMode() const = 0;
1311 
1317  virtual void setShift(Weights shift) = 0;
1318 
1324  virtual Weights getShift() const = 0;
1325 
1331  virtual void setScale(Weights scale) = 0;
1332 
1338  virtual Weights getScale() const = 0;
1339 
1345  virtual void setPower(Weights power) = 0;
1346 
1352  virtual Weights getPower() const = 0;
1353 
1354 protected:
1355  virtual ~IScaleLayer() {}
1356 };
1357 
1367 class ISoftMaxLayer : public ILayer
1368 {
1369 protected:
1370  virtual ~ISoftMaxLayer() {}
1371 public:
1387  virtual void setAxes(uint32_t axes) = 0;
1388 
1394  virtual uint32_t getAxes() const = 0;
1395 };
1396 
1407 {
1408 protected:
1409  virtual ~IConcatenationLayer() {}
1410 
1411 public:
1420  virtual void setAxis(int axis) = 0;
1421 
1427  virtual int getAxis() const = 0;
1428 };
1429 
1438 {
1439 public:
1445  virtual void setKernelSize(DimsHW kernelSize) = 0;
1446 
1452  virtual DimsHW getKernelSize() const = 0;
1453 
1459  virtual void setNbOutputMaps(int nbOutputMaps) = 0;
1460 
1466  virtual int getNbOutputMaps() const = 0;
1467 
1473  virtual void setStride(DimsHW stride) = 0;
1474 
1480  virtual DimsHW getStride() const = 0;
1481 
1491  virtual void setPadding(DimsHW padding) = 0;
1492 
1498  virtual DimsHW getPadding() const = 0; // padding defaults to 0
1499 
1512  virtual void setNbGroups(int nbGroups) = 0;
1513 
1519  virtual int getNbGroups() const = 0;
1520 
1529  virtual void setKernelWeights(Weights weights) = 0;
1530 
1536  virtual Weights getKernelWeights() const = 0;
1537 
1547  virtual void setBiasWeights(Weights weights) = 0;
1548 
1554  virtual Weights getBiasWeights() const = 0;
1555 
1556 protected:
1557  virtual ~IDeconvolutionLayer() {}
1558 };
1559 
1567 enum class ElementWiseOperation : int
1568 {
1569  kSUM = 0,
1570  kPROD = 1,
1571  kMAX = 2,
1572  kMIN = 3,
1573  kSUB = 4,
1574  kDIV = 5,
1575  kPOW = 6
1576 };
1577 
1578 template <>
1580 {
1581  return 7;
1582 }
1583 
1594 {
1595 public:
1603  virtual void setOperation(ElementWiseOperation type) = 0;
1604 
1612  virtual ElementWiseOperation getOperation() const = 0;
1613 
1614 protected:
1615  virtual ~IElementWiseLayer() {}
1616 };
1617 
1618 class IGatherLayer : public ILayer
1619 {
1620 public:
1627  virtual void setGatherAxis(int axis) = 0;
1628 
1634  virtual int getGatherAxis() const = 0;
1635 
1636 protected:
1637  virtual ~IGatherLayer() {}
1638 };
1639 
1719 enum class RNNOperation : int
1720 {
1721  kRELU = 0,
1722  kTANH = 1,
1723  kLSTM = 2,
1724  kGRU = 3
1725 };
1726 
1727 template <>
1729 {
1730  return 4;
1731 }
1732 
1740 enum class RNNDirection : int
1741 {
1742  kUNIDIRECTION = 0,
1743  kBIDIRECTION = 1
1744 };
1745 
1746 template <>
1748 {
1749  return 2;
1750 }
1751 
1767 enum class RNNInputMode : int
1768 {
1769  kLINEAR = 0,
1770  kSKIP = 1
1771 };
1772 
1773 template <>
1775 {
1776  return 2;
1777 }
1778 
1788 class IRNNLayer : public ILayer
1789 {
1790 public:
1796  virtual unsigned getLayerCount() const = 0;
1797 
1806  virtual std::size_t getHiddenSize() const = 0;
1807 
1816  virtual int getSeqLength() const = 0;
1817 
1823  virtual void setOperation(RNNOperation op) = 0;
1824 
1830  virtual RNNOperation getOperation() const = 0;
1831 
1837  virtual void setInputMode(RNNInputMode op) = 0;
1838 
1844  virtual RNNInputMode getInputMode() const = 0;
1845 
1857  virtual void setDirection(RNNDirection op) = 0;
1858 
1864  virtual RNNDirection getDirection() const = 0;
1865 
1980  virtual void setWeights(Weights weights) = 0;
1981 
1987  virtual Weights getWeights() const = 0;
1988 
2040  virtual void setBias(Weights bias) = 0;
2041 
2047  virtual Weights getBias() const = 0;
2048 
2055  virtual int getDataLength() const = 0;
2056 
2073  virtual void setHiddenState(ITensor& hidden) = 0;
2074 
2080  virtual ITensor* getHiddenState() const = 0;
2081 
2100  virtual void setCellState(ITensor& cell) = 0;
2101 
2107  virtual ITensor* getCellState() const = 0;
2108 
2109 protected:
2110  virtual ~IRNNLayer() {}
2111 };
2112 
2120 enum class RNNGateType : int
2121 {
2122  kINPUT = 0,
2123  kOUTPUT = 1,
2124  kFORGET = 2,
2125  kUPDATE = 3,
2126  kRESET = 4,
2127  kCELL = 5,
2128  kHIDDEN = 6
2129 };
2130 
2131 template <>
2133 {
2134  return 7;
2135 }
2136 
2144 class IRNNv2Layer : public ILayer
2145 {
2146 public:
2147  virtual int32_t getLayerCount() const = 0; //< Get the layer count of the RNN
2148  virtual int32_t getHiddenSize() const = 0; //< Get the hidden size of the RNN
2149  virtual int32_t getMaxSeqLength() const = 0; //< Get the maximum sequence length of the RNN
2150  virtual int32_t getDataLength() const = 0; //< Get the maximum data length of the RNN
2151 
2166  virtual void setSequenceLengths(ITensor& seqLengths) = 0;
2167 
2175  virtual ITensor *getSequenceLengths() const = 0;
2176 
2181  virtual void setOperation(RNNOperation op) = 0;
2182 
2187  virtual RNNOperation getOperation() const = 0;
2188 
2193  virtual void setInputMode(RNNInputMode op) = 0;
2194 
2199  virtual RNNInputMode getInputMode() const = 0;
2200 
2205  virtual void setDirection(RNNDirection op) = 0;
2206 
2211  virtual RNNDirection getDirection() const = 0;
2212 
2230  virtual void setWeightsForGate(int layerIndex, RNNGateType gate, bool isW, Weights weights) = 0;
2231 
2236  virtual Weights getWeightsForGate(int layerIndex, RNNGateType gate, bool isW) const = 0;
2237 
2253  virtual void setBiasForGate(int layerIndex, RNNGateType gate, bool isW, Weights bias) = 0;
2254 
2259  virtual Weights getBiasForGate(int layerIndex, RNNGateType gate, bool isW) const = 0;
2260 
2270  virtual void setHiddenState(ITensor& hidden) = 0;
2271 
2276  virtual ITensor* getHiddenState() const = 0;
2277 
2289  virtual void setCellState(ITensor& cell) = 0;
2290 
2295  virtual ITensor* getCellState() const = 0;
2296 protected:
2297  virtual ~IRNNv2Layer() {}
2298 };
2299 
2306 {
2307 public:
2322  virtual DimsHW compute(DimsHW inputDims, DimsHW kernelSize, DimsHW stride, DimsHW padding, DimsHW dilation, const char* layerName) = 0;
2323 
2324 protected:
2325  virtual ~IOutputDimensionsFormula() {}
2326 };
2327 
2335 enum class PluginFormat: uint8_t
2336 {
2337  kNCHW = 0,
2338  kNC2HW2 = 1,
2339  kNHWC8 = 2
2340 };
2341 
2342 template <>
2344 {
2345  return 3;
2346 }
2347 
2355 class IPlugin
2356 {
2357 public:
2365  virtual int getNbOutputs() const = 0;
2366 
2376  virtual Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) = 0;
2377 
2394  virtual void configure(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, int maxBatchSize) = 0;
2395 
2401  virtual int initialize() = 0;
2402 
2406  virtual void terminate() = 0;
2407 
2416  virtual size_t getWorkspaceSize(int maxBatchSize) const = 0;
2417 
2429  virtual int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) = 0;
2430 
2436  virtual size_t getSerializationSize() = 0;
2437 
2445  virtual void serialize(void* buffer) = 0;
2446 protected:
2447  virtual ~IPlugin() {}
2448 };
2449 
2458 class IPluginExt : public IPlugin
2459 {
2460 public:
2466  virtual int getTensorRTVersion() const
2467  {
2468  return NV_TENSORRT_VERSION;
2469  }
2470 
2481  virtual bool supportsFormat(DataType type, PluginFormat format) const = 0;
2482 
2499  virtual void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) = 0;
2500 
2501 protected:
2505  void configure(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, int maxBatchSize) _TENSORRT_FINAL {}
2506  virtual ~IPluginExt() {}
2507 };
2508 
2516 class IPluginLayer : public ILayer
2517 {
2518 public:
2524  virtual IPlugin& getPlugin() = 0;
2525 
2526 protected:
2527  virtual ~IPluginLayer() {}
2528 };
2529 
2537 enum class UnaryOperation : int
2538 {
2539  kEXP = 0,
2540  kLOG = 1,
2541  kSQRT = 2,
2542  kRECIP = 3,
2543  kABS = 4,
2544  kNEG = 5,
2545 };
2546 
2547 template <>
2549 {
2550  return 6;
2551 }
2552 
2558 class IUnaryLayer : public ILayer
2559 {
2560 public:
2566  virtual void setOperation(UnaryOperation op) = 0;
2567 
2573  virtual UnaryOperation getOperation() const = 0;
2574 
2575 protected:
2576  virtual ~IUnaryLayer() {}
2577 };
2578 
2584 enum class ReduceOperation : int
2585 {
2586  kSUM = 0,
2587  kPROD = 1,
2588  kMAX = 2,
2589  kMIN = 3,
2590  kAVG = 4
2591 };
2592 
2593 template <>
2595 {
2596  return 5;
2597 }
2598 
2604 class IReduceLayer : public ILayer
2605 {
2606 public:
2612  virtual void setOperation(ReduceOperation op) = 0;
2613 
2619  virtual ReduceOperation getOperation() const = 0;
2620 
2626  virtual void setReduceAxes(uint32_t reduceAxes) = 0;
2627 
2633  virtual uint32_t getReduceAxes() const = 0;
2634 
2640  virtual void setKeepDimensions(bool keepDimensions) = 0;
2641 
2647  virtual bool getKeepDimensions() const = 0;
2648 
2649 protected:
2650  virtual ~IReduceLayer() {}
2651 };
2652 
2661 class IPaddingLayer : public ILayer
2662 {
2663 public:
2671  virtual void setPrePadding(DimsHW padding) = 0;
2672 
2678  virtual DimsHW getPrePadding() const = 0;
2679 
2687  virtual void setPostPadding(DimsHW padding) = 0;
2688 
2694  virtual DimsHW getPostPadding() const = 0;
2695 
2696 protected:
2697  virtual ~IPaddingLayer() {}
2698 };
2699 
2708 {
2716 };
2717 
2718 class IShuffleLayer : public ILayer
2719 {
2720 public:
2730  virtual void setFirstTranspose(Permutation permutation) = 0;
2731 
2739  virtual Permutation getFirstTranspose() const = 0;
2740 
2759  virtual void setReshapeDimensions(Dims dimensions) = 0;
2760 
2766  virtual Dims getReshapeDimensions() const = 0;
2767 
2780  virtual void setSecondTranspose(Permutation permutation) = 0;
2781 
2789  virtual Permutation getSecondTranspose() const = 0;
2790 
2791 protected:
2792  virtual ~IShuffleLayer() {}
2793 };
2794 
2800 enum class TopKOperation : int
2801 {
2802  kMAX = 0,
2803  kMIN = 1,
2804 };
2805 
2806 template <>
2808 {
2809  return 2;
2810 }
2811 
2817 class ITopKLayer : public ILayer
2818 {
2819 public:
2825  virtual void setOperation(TopKOperation op) = 0;
2826 
2832  virtual TopKOperation getOperation() const = 0;
2833 
2841  virtual void setK(int k) = 0;
2842 
2848  virtual int getK() const = 0;
2849 
2855  virtual void setReduceAxes(uint32_t reduceAxes) = 0;
2856 
2862  virtual uint32_t getReduceAxes() const = 0;
2863 
2864 protected:
2865  virtual ~ITopKLayer() {}
2866 };
2867 
2888 {
2889 public:
2896  virtual void setTranspose(int index, bool val) = 0;
2897 
2903  virtual bool getTranspose(int index) const = 0;
2904 
2905 protected:
2906  virtual ~IMatrixMultiplyLayer() {}
2907 };
2908 
2922 {
2923 protected:
2924  virtual ~IRaggedSoftMaxLayer() {}
2925 };
2926 
2931 class IConstantLayer : public ILayer
2932 {
2933 public:
2939  virtual void setWeights(Weights weights) = 0;
2940 
2946  virtual Weights getWeights() const = 0;
2947 
2955  virtual void setDimensions(Dims dimensions) = 0;
2956 
2964  virtual Dims getDimensions() const = 0;
2965 
2966 protected:
2967  virtual ~IConstantLayer() {}
2968 };
2969 
2976 {
2977 public:
2994  virtual ITensor* addInput(const char* name, DataType type, Dims dimensions) = 0;
2995 
3001  virtual void markOutput(ITensor& tensor) = 0;
3002 
3016  virtual IConvolutionLayer* addConvolution(ITensor& input, int nbOutputMaps, DimsHW kernelSize, Weights kernelWeights, Weights biasWeights) = 0;
3017 
3030  virtual IFullyConnectedLayer* addFullyConnected(ITensor& input, int nbOutputs, Weights kernelWeights, Weights biasWeights) = 0;
3031 
3042  virtual IActivationLayer* addActivation(ITensor& input, ActivationType type) = 0;
3043 
3055  virtual IPoolingLayer* addPooling(ITensor& input, PoolingType type, DimsHW windowSize) = 0;
3056 
3070  virtual ILRNLayer* addLRN(ITensor& input, int window, float alpha, float beta, float k) = 0;
3071 
3090  virtual IScaleLayer* addScale(ITensor& input, ScaleMode mode, Weights shift, Weights scale, Weights power) = 0;
3091 
3099  virtual ISoftMaxLayer* addSoftMax(ITensor& input) = 0;
3100 
3113  virtual IConcatenationLayer* addConcatenation(ITensor* const* inputs, int nbInputs) = 0;
3114 
3128  virtual IDeconvolutionLayer* addDeconvolution(ITensor& input, int nbOutputMaps, DimsHW kernelSize, Weights kernelWeights, Weights biasWeights) = 0;
3129 
3141  virtual IElementWiseLayer* addElementWise(ITensor& input1, ITensor& input2, ElementWiseOperation op) = 0;
3142 
3197  virtual IRNNLayer* addRNN(ITensor& inputs, int layerCount, std::size_t hiddenSize, int maxSeqLen, RNNOperation op, RNNInputMode mode, RNNDirection dir, Weights weights, Weights bias) = 0;
3198 
3210  virtual IPluginLayer* addPlugin(ITensor* const* inputs, int nbInputs, IPlugin& plugin) = 0;
3211 
3222  virtual IUnaryLayer* addUnary(ITensor& input, UnaryOperation operation) = 0;
3223 
3234  virtual IPaddingLayer* addPadding(ITensor& input, DimsHW prePadding, DimsHW postPadding) = 0;
3235 
3243  virtual IShuffleLayer* addShuffle(ITensor& input) = 0;
3244 
3255 
3264 
3277 
3288 
3301 
3312 
3320  virtual int getNbLayers() const = 0;
3321 
3331  virtual ILayer* getLayer(int index) const = 0;
3332 
3340  virtual int getNbInputs() const = 0;
3341 
3351  virtual ITensor* getInput(int index) const = 0; // adding inputs invalidates indexing here
3352 
3360  virtual int getNbOutputs() const = 0;
3361 
3371  virtual ITensor* getOutput(int index) const = 0; // adding outputs invalidates indexing here
3372 
3376  virtual void destroy() = 0;
3377 
3378 protected:
3379  virtual ~INetworkDefinition() {}
3380 
3381 public:
3401  virtual IReduceLayer* addReduce(ITensor& input, ReduceOperation operation, uint32_t reduceAxes, bool keepDimensions) = 0;
3402 
3427  virtual ITopKLayer* addTopK(ITensor& input, TopKOperation op, int k, uint32_t reduceAxes) = 0;
3428 
3440  virtual IGatherLayer* addGather(ITensor& data, ITensor& indices, int axis) = 0;
3441 
3452  virtual IRaggedSoftMaxLayer* addRaggedSoftMax(ITensor &input, ITensor &bounds) = 0;
3453 
3466  virtual IMatrixMultiplyLayer* addMatrixMultiply(ITensor& input0, bool transpose0, ITensor& input1, bool transpose1) = 0;
3467 
3478  virtual IConstantLayer* addConstant(Dims dimensions, Weights weights) = 0;
3479 
3534  virtual IRNNv2Layer *addRNNv2(ITensor &input, int32_t layerCount, int32_t hiddenSize, int32_t maxSeqLen, RNNOperation op) = 0;
3535 
3547  virtual IPluginLayer* addPluginExt(ITensor* const* inputs, int nbInputs, IPluginExt& plugin) = 0;
3548 };
3549 
3561 {
3562 public:
3569  virtual void reportLayerTime(const char* layerName, float ms) = 0;
3570 
3571 protected:
3572  virtual ~IProfiler() {}
3573 };
3574 
3575 class ICudaEngine;
3576 
3586 {
3587 public:
3599  virtual bool execute(int batchSize, void** bindings) = 0;
3600 
3614  virtual bool enqueue(int batchSize, void** bindings, cudaStream_t stream, cudaEvent_t* inputConsumed) = 0;
3615 
3623  virtual void setDebugSync(bool sync) = 0;
3624 
3630  virtual bool getDebugSync() const = 0;
3631 
3637  virtual void setProfiler(IProfiler*) = 0;
3638 
3644  virtual IProfiler* getProfiler() const = 0;
3645 
3651  virtual const ICudaEngine& getEngine() const = 0;
3652 
3656  virtual void destroy() = 0;
3657 
3658 protected:
3659  virtual ~IExecutionContext() {}
3660 
3661 public:
3669  virtual void setName(const char* name) = 0;
3670 
3676  virtual const char* getName() const = 0;
3677 
3689  virtual void setDeviceMemory(void* memory) = 0;
3690 };
3691 
3698 {
3699 public:
3705  virtual int getNbBindings() const = 0;
3706 
3720  virtual int getBindingIndex(const char* name) const = 0;
3721 
3732  virtual const char* getBindingName(int bindingIndex) const = 0;
3733 
3742  virtual bool bindingIsInput(int bindingIndex) const = 0;
3743 
3752  virtual Dims getBindingDimensions(int bindingIndex) const = 0;
3753 
3762  virtual DataType getBindingDataType(int bindingIndex) const = 0;
3763 
3771  virtual int getMaxBatchSize() const = 0;
3772 
3781  virtual int getNbLayers() const = 0;
3782 
3789  virtual std::size_t getWorkspaceSize() const = 0;
3790 
3800  virtual IHostMemory* serialize() const = 0;
3801 
3808 
3812  virtual void destroy() = 0;
3813 
3824  virtual TensorLocation getLocation(int bindingIndex) const = 0;
3825 
3826 protected:
3827  virtual ~ICudaEngine() {}
3828 
3829 public:
3838 
3844  virtual size_t getDeviceMemorySize() const = 0;
3845 };
3846 
3852 enum class CalibrationAlgoType : int
3853 {
3854  kLEGACY_CALIBRATION = 0,
3855  kENTROPY_CALIBRATION = 1
3856 };
3857 
3858 template <>
3860 {
3861  return 2;
3862 }
3863 
3876 {
3877 public:
3883  virtual int getBatchSize() const = 0;
3884 
3898  virtual bool getBatch(void* bindings[], const char* names[], int nbBindings) = 0; // get a pointer to the input batch
3899 
3912  virtual const void* readCalibrationCache(std::size_t& length) = 0;
3913 
3922  virtual void writeCalibrationCache(const void* ptr, std::size_t length) = 0;
3923 
3929  virtual CalibrationAlgoType getAlgorithm() = 0;
3930 
3931 protected:
3932  virtual ~IInt8Calibrator() {}
3933 };
3934 
3939 {
3943  virtual CalibrationAlgoType getAlgorithm() { return CalibrationAlgoType::kENTROPY_CALIBRATION; }
3944 protected:
3945  virtual ~IInt8EntropyCalibrator() {}
3946 };
3947 
3953 {
3954 public:
3958  virtual CalibrationAlgoType getAlgorithm() { return CalibrationAlgoType::kLEGACY_CALIBRATION; }
3959 
3965  virtual double getQuantile() const = 0;
3966 
3972  virtual double getRegressionCutoff() const = 0;
3973 
3985  virtual const void* readHistogramCache(std::size_t& length) = 0;
3986 
3995  virtual void writeHistogramCache(const void* ptr, std::size_t length) = 0;
3996 
3997 protected:
3998  virtual ~IInt8LegacyCalibrator() {}
3999 };
4000 
4007 {
4008 public:
4020  virtual void* allocate(uint64_t size, uint64_t alignment, uint32_t flags) = 0;
4021 
4029  virtual void free(void* memory) = 0;
4030 };
4031 
4038 {
4039 public:
4046 
4054  virtual void setMaxBatchSize(int batchSize) = 0;
4055 
4063  virtual int getMaxBatchSize() const = 0;
4064 
4072  virtual void setMaxWorkspaceSize(std::size_t workspaceSize) = 0;
4073 
4081  virtual std::size_t getMaxWorkspaceSize() const = 0;
4082 
4094  virtual void setHalf2Mode(bool mode) = 0;
4095 
4103  virtual bool getHalf2Mode() const = 0;
4104 
4110  virtual void setDebugSync(bool sync) = 0;
4111 
4117  virtual bool getDebugSync() const = 0;
4118 
4127  virtual void setMinFindIterations(int minFind) = 0;
4128 
4134  virtual int getMinFindIterations() const = 0;
4135 
4144  virtual void setAverageFindIterations(int avgFind) = 0;
4145 
4151  virtual int getAverageFindIterations() const = 0;
4152 
4159 
4163  virtual bool platformHasFastFp16() const = 0;
4164 
4168  virtual bool platformHasFastInt8() const = 0;
4169 
4173  virtual void destroy() = 0;
4174 
4180  virtual void setInt8Mode(bool mode) = 0;
4181 
4187  virtual bool getInt8Mode() const = 0;
4188 
4192  virtual void setInt8Calibrator(IInt8Calibrator* calibrator) = 0;
4193 
4194 protected:
4195  virtual ~IBuilder() {}
4196 
4197 public:
4207  virtual void setGpuAllocator(IGpuAllocator* allocator) = 0;
4208 
4218  virtual void setFp16Mode(bool mode) = 0;
4219 
4225  virtual bool getFp16Mode() const = 0;
4226 };
4227 
4234 {
4235 public:
4250  virtual IPlugin* createPlugin(const char* layerName, const void* serialData, size_t serialLength) = 0;
4251 };
4252 
4259 {
4260 public:
4270  virtual nvinfer1::ICudaEngine* deserializeCudaEngine(const void* blob, std::size_t size, IPluginFactory* pluginFactory) = 0;
4271 
4275  virtual void destroy() = 0;
4276 
4277 protected:
4278  virtual ~IRuntime() {}
4279 
4280 public:
4289  virtual void setGpuAllocator(IGpuAllocator* allocator) = 0;
4290 };
4291 
4300 class ILogger
4301 {
4302 public:
4308  enum class Severity
4309  {
4310  kINTERNAL_ERROR = 0,
4311  kERROR = 1,
4312  kWARNING = 2,
4313  kINFO = 3
4314  };
4315 
4322  virtual void log(Severity severity, const char* msg) = 0;
4323 
4324 protected:
4325  virtual ~ILogger() {}
4326 };
4327 
4328 template <>
4329 inline int EnumMax<ILogger::Severity>()
4330 {
4331  return 4;
4332 }
4333 
4334 } // namespace nvinfer1
4335 
4336 extern "C" TENSORRTAPI void* createInferBuilder_INTERNAL(void* logger, int version);
4337 extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(void* logger, int version);
4338 
4342 extern "C" TENSORRTAPI nvinfer1::ILogger* getLogger();
4343 
4349 extern "C" TENSORRTAPI int getInferLibVersion();
4350 
4351 namespace nvinfer1
4352 {
4358 namespace // unnamed namespace in case the compiler doesn't inline these
4359 {
4360 inline IBuilder* createInferBuilder(ILogger& logger)
4361 {
4362  return static_cast<IBuilder*>(createInferBuilder_INTERNAL(&logger, NV_TENSORRT_VERSION));
4363 }
4364 
4370 inline IRuntime* createInferRuntime(ILogger& logger)
4371 {
4372  return static_cast<IRuntime*>(createInferRuntime_INTERNAL(&logger, NV_TENSORRT_VERSION));
4373 }
4374 }
4375 }
4376 
4377 #endif
DimensionType type[MAX_DIMS]
The type of each dimension.
Definition: NvInfer.h:164
int EnumMax< PluginFormat >()
Maximum number of elements in PluginFormat enum.
Definition: NvInfer.h:2343
virtual bool enqueue(int batchSize, void **bindings, cudaStream_t stream, cudaEvent_t *inputConsumed)=0
Asynchronously execute inference on a batch.
int w() const
Get the width.
Definition: NvInfer.h:482
int n() const
Get the index count.
Definition: NvInfer.h:440
An engine for executing inference on a built network.
Definition: NvInfer.h:3697
Substract the second element from the first.
Perform the normal matrix multiplication in the first recurrent layer.
DataType
The type of weights and tensors.
Definition: NvInfer.h:118
virtual void setAverageCountExcludesPadding(bool exclusive)=0
Set whether average pooling uses as a denominator the overlap area between the window and the unpadde...
virtual bool isNetworkInput() const =0
Whether the tensor is a network input.
virtual DimsHW getDilation() const =0
Get the dilation for a convolution.
Severity
Definition: NvInfer.h:4308
virtual void setMinFindIterations(int minFind)=0
Set the number of minimization iterations used when timing layers.
virtual int getMaxBatchSize() const =0
Get the maximum batch size.
int EnumMax< TensorLocation >()
Maximum number of elements in TensorLocation enum.
Definition: NvInfer.h:570
virtual ITensor * getCellState() const =0
Get the initial cell state of the RNN.
virtual IPlugin * createPlugin(const char *layerName, const void *serialData, size_t serialLength)=0
Create a plugin from serialized data.
virtual ITensor * addInput(const char *name, DataType type, Dims dimensions)=0
Add an input tensor to the network.
virtual uint32_t getAxes() const =0
Get the axis along which softmax occurs.
virtual void setBias(Weights bias)=0
Set the bias parameters for the RNN.
DimsNCHW(int batchSize, int channels, int height, int width)
Construct a DimsNCHW given batch size, channel count, height and width.
Definition: NvInfer.h:420
virtual void setKernelWeights(Weights weights)=0
Set the kernel weights for the deconvolution.
virtual ReduceOperation getOperation() const =0
Get the reduce operation for the layer.
#define NV_TENSORRT_MAJOR
TensorRT major version.
Definition: NvInfer.h:56
virtual IExecutionContext * createExecutionContext()=0
Create an execution context.
virtual int getBatchSize() const =0
Get the batch size used for calibration batches.
RNNOperation
Enumerates the RNN operations that may be performed by an RNN layer.
Definition: NvInfer.h:1719
virtual void setOperation(ElementWiseOperation type)=0
Set the binary operation for the layer.
virtual void setBroadcastAcrossBatch(bool broadcastAcrossBatch)=0
Set whether to enable broadcast of tensor across the batch.
A Softmax layer in a network definition.
Definition: NvInfer.h:1367
virtual int getNbGroups() const =0
Set the number of groups for a convolution.
virtual Weights getPower() const =0
Get the power value.
virtual void setWeightsForGate(int layerIndex, RNNGateType gate, bool isW, Weights weights)=0
Set the weight parameters for an individual gate in the RNN.
Definition: NvInfer.h:2707
virtual double getQuantile() const =0
The quantile (between 0 and 1) that will be used to select the region maximum when the quantile metho...
virtual std::size_t getWorkspaceSize() const =0
Get the amount of workspace the engine uses.
An application error has occurred.
An application error has been discovered, but TensorRT has recovered or fallen back to a default...
virtual Weights getBias() const =0
Get the bias parameter vector for the RNN.
virtual void destroy()=0
Destroy the allocated memory.
virtual IMatrixMultiplyLayer * addMatrixMultiply(ITensor &input0, bool transpose0, ITensor &input1, bool transpose1)=0
Add a MatrixMultiply layer to the network.
virtual bool bindingIsInput(int bindingIndex) const =0
Determine whether a binding is an input binding.
virtual int getGatherAxis() const =0
Get the non-batch dimension axis to gather on.
virtual void setInputMode(RNNInputMode op)=0
Set the input mode of the RNN layer.
virtual const char * getName() const =0
Return the name of a layer.
virtual Weights getScale() const =0
Get the scale value.
virtual DimsHW getKernelSize() const =0
Get the HW kernel size of the deconvolution.
Layer that represents an unary operation.
Definition: NvInfer.h:2558
virtual void destroy()=0
Destroy this object.
virtual DimsHW getStride() const =0
Get the stride of the deconvolution.
Rectified linear activation.
virtual bool getDebugSync() const =0
Get the debug sync flag.
virtual bool execute(int batchSize, void **bindings)=0
Synchronously execute inference on a batch.
virtual void destroy()=0
Destroy this INetworkDefinition object.
An Activation layer in a network definition.
Definition: NvInfer.h:1032
TENSORRTAPI void * createInferRuntime_INTERNAL(void *logger, int version)
Internal C entry point for creating IRuntime.
int w() const
Get the width.
Definition: NvInfer.h:357
virtual Dims getOutputDimensions(int index, const Dims *inputs, int nbInputDims)=0
Get the dimension of an output tensor.
RNNDirection
Enumerates the RNN direction that may be performed by an RNN layer.
Definition: NvInfer.h:1740
int EnumMax< DataType >()
Maximum number of elements in DataType enum.
Definition: NvInfer.h:127
Layer that represents a Matrix Multiplication.
Definition: NvInfer.h:2887
virtual DimsHW getKernelSize() const =0
Get the HW kernel size of the convolution.
virtual DataType type() const =0
The type of the memory that was allocated.
virtual int getAverageFindIterations() const =0
Query the number of averaging iterations.
DimsCHW()
Construct an empty DimsCHW object.
Definition: NvInfer.h:296
No operation is performed on the first recurrent layer.
const void * values
The weight values, in a contiguous array.
Definition: NvInfer.h:497
int c() const
Get the channel count.
Definition: NvInfer.h:454
virtual bool getDebugSync() const =0
Query whether the builder will use debug synchronization.
virtual void setAlpha(float alpha)=0
Set the LRN alpha value.
struct CUevent_st * cudaEvent_t
Forward declaration of cudaEvent_t.
Definition: NvInfer.h:99
virtual Dims getBindingDimensions(int bindingIndex) const =0
Get the dimensions of a binding.
int h() const
Get the height.
Definition: NvInfer.h:468
A convolution layer in a network definition.
Definition: NvInfer.h:789
virtual ITopKLayer * addTopK(ITensor &input, TopKOperation op, int k, uint32_t reduceAxes)=0
Add a TopK layer to the network.
Allows a serialized engine to be deserialized.
Definition: NvInfer.h:4258
virtual Dims getReshapeDimensions() const =0
Get the reshaped dimensions.
virtual void writeHistogramCache(const void *ptr, std::size_t length)=0
Save a histogram cache.
NCHW with 2-element packed channels.
virtual void setPoolingOutputDimensionsFormula(IOutputDimensionsFormula *formula)=0
Set the pooling output dimensions formula.
A RaggedSoftmax layer in a network definition.
Definition: NvInfer.h:2921
virtual unsigned getLayerCount() const =0
Get the number of layers in the RNN.
virtual int getNbOutputs() const =0
Get the number of outputs from the layer.
virtual void configure(const Dims *inputDims, int nbInputs, const Dims *outputDims, int nbOutputs, int maxBatchSize)=0
Configure the layer.
ScaleMode
Controls how scale is applied in a Scale layer.
Definition: NvInfer.h:1262
Plugin class for user-implemented layers.
Definition: NvInfer.h:2355
Layer that represents a constant value.
Definition: NvInfer.h:2931
virtual ILayer * getLayer(int index) const =0
Get the layer specified by the given index.
A Scale layer in a network definition.
Definition: NvInfer.h:1295
virtual bool getBatch(void *bindings[], const char *names[], int nbBindings)=0
Get a batch of input for calibration.
virtual IRaggedSoftMaxLayer * addRaggedSoftMax(ITensor &input, ITensor &bounds)=0
Add a RaggedSoftMax layer to the network.
virtual int getNbGroups() const =0
Set the number of groups for a deconvolution.
NHWC with 8-element packed channels (C must be a multiple of 8).
virtual void setNbOutputMaps(int nbOutputMaps)=0
Set the number of output maps for the convolution.
virtual void log(Severity severity, const char *msg)=0
virtual const char * getBindingName(int bindingIndex) const =0
Retrieve the name corresponding to a binding index.
virtual IRNNv2Layer * addRNNv2(ITensor &input, int32_t layerCount, int32_t hiddenSize, int32_t maxSeqLen, RNNOperation op)=0
Add an layerCount deep RNN layer to the network with hiddenSize internal states that can take a batch...
virtual bool getFp16Mode() const =0
Query whether 16-bit kernels are permitted.
virtual IScaleLayer * addScale(ITensor &input, ScaleMode mode, Weights shift, Weights scale, Weights power)=0
Add a Scale layer to the network.
virtual void setHiddenState(ITensor &hidden)=0
Set the initial hidden state of the RNN with the provided hidden ITensor.
int EnumMax< RNNOperation >()
Maximum number of elements in RNNOperation enum.
Definition: NvInfer.h:1728
int EnumMax< LayerType >()
Maximum number of elements in LayerType enum.
Definition: NvInfer.h:554
virtual void setAxes(uint32_t axes)=0
Set the axis along which softmax is computed. Currently, only one axis can be set.
static const int MAX_DIMS
The maximum number of dimensions supported for a tensor.
Definition: NvInfer.h:161
virtual void setPrePadding(DimsHW padding)=0
Set the padding that is applied at the start of the tensor.
virtual DataType getBindingDataType(int bindingIndex) const =0
Determine the required data type for a buffer from its binding index.
A fully connected layer in a network definition. This layer expects an input tensor of three or more ...
Definition: NvInfer.h:954
virtual ITensor * getSequenceLengths() const =0
Get the sequence lengths specified for the RNN.
virtual DataType getType() const =0
Get the data type of a tensor.
PluginFormat
Definition: NvInfer.h:2335
TENSORRTAPI void * createInferBuilder_INTERNAL(void *logger, int version)
Internal C entry point for creating IBuilder.
virtual void setKeepDimensions(bool keepDimensions)=0
Set the boolean that specifies whether or not to keep the reduced dimensions for the layer...
virtual void setBeta(float beta)=0
Set the LRN beta value.
int w() const
Get the width.
Definition: NvInfer.h:251
ReduceOperation
Enumerates the reduce operations that may be performed by a Reduce layer.
Definition: NvInfer.h:2584
An internal error has occurred. Execution is unrecoverable.
virtual const char * getName() const =0
Return the name of the execution context.
A LRN layer in a network definition.
Definition: NvInfer.h:1190
Descriptor for three-dimensional data.
Definition: NvInfer.h:258
virtual bool getInt8Mode() const =0
Query whether Int8 mode is used.
virtual int getNbLayers() const =0
Get the number of layers in the network.
virtual ElementWiseOperation getOperation() const =0
Get the binary operation for the layer.
virtual void setOperation(UnaryOperation op)=0
Set the unary operation for the layer.
virtual IPaddingLayer * addPadding(ITensor &input, DimsHW prePadding, DimsHW postPadding)=0
Add a padding layer to the network.
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInfer.h:563
UnaryOp Operation Layer.
Definition: NvInfer.h:1618
Builds an engine from a network definition.
Definition: NvInfer.h:4037
virtual void markOutput(ITensor &tensor)=0
Mark a tensor as a network output.
virtual DimsHW getPadding() const =0
Get the padding of the deconvolution.
virtual void setOperation(RNNOperation op)=0
Set the operation of the RNN layer.
virtual void writeCalibrationCache(const void *ptr, std::size_t length)=0
Save a calibration cache.
virtual void setBiasWeights(Weights weights)=0
Set the bias weights for the convolution.
TENSORRTAPI nvinfer1::ILogger * getLogger()
Return the logger object.
virtual TopKOperation getOperation() const =0
Get the operation for the layer.
Layer that represents a TopK reduction.
Definition: NvInfer.h:2817
virtual RNNInputMode getInputMode() const =0
Get the input mode of the RNN layer.
int EnumMax< ActivationType >()
Maximum number of elements in ActivationType enum.
Definition: NvInfer.h:1018
virtual void setInt8Mode(bool mode)=0
Set the maximum value for a region.
virtual void setReshapeDimensions(Dims dimensions)=0
Set the reshaped dimensions.
DataType type
The type of the weights.
Definition: NvInfer.h:496
virtual float getBeta() const =0
Get the LRN beta value.
virtual void setPadding(DimsHW padding)=0
Set the padding of the deconvolution.
virtual int enqueue(int batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream)=0
Execute the layer.
virtual TensorLocation getLocation() const =0
Get the storage location of a tensor.
An RNN layer in a network definition, version 2.
Definition: NvInfer.h:2144
virtual void setType(DataType type)=0
Set the data type of a tensor.
virtual ITensor * getOutput(int index) const =0
Get the output tensor specified by the given index.
virtual Permutation getSecondTranspose() const =0
Get the permutation applied by the second transpose operation.
int & h()
Get the height.
Definition: NvInfer.h:336
Elements correspond to different spatial data.
virtual DimsHW getPadding() const =0
Get the padding of the convolution.
virtual nvinfer1::INetworkDefinition * createNetwork()=0
Create a network definition object.
virtual int getNbOutputChannels() const =0
Get the number of output channels K from the fully connected layer.
virtual bool getTranspose(int index) const =0
Get the transpose flag for an input tensor.
virtual Weights getWeights() const =0
Get the weights for the layer.
virtual void setFp16Mode(bool mode)=0
Set whether or not 16-bit kernels are permitted.
virtual void reportLayerTime(const char *layerName, float ms)=0
Layer time reporting callback.
virtual void setK(int k)=0
Set the k value for the layer.
virtual IDeconvolutionLayer * addDeconvolution(ITensor &input, int nbOutputMaps, DimsHW kernelSize, Weights kernelWeights, Weights biasWeights)=0
Add a deconvolution layer to the network.
virtual IActivationLayer * addActivation(ITensor &input, ActivationType type)=0
Add an activation layer to the network.
virtual Weights getKernelWeights() const =0
Get the kernel weights.
virtual void setName(const char *name)=0
Set the name of the execution context.
virtual int getNbOutputs() const =0
Get the number of outputs in the network.
int & n()
Get the index count.
Definition: NvInfer.h:433
Data stored on device.
int EnumMax< TopKOperation >()
Maximum number of elements in TopKOperation enum.
Definition: NvInfer.h:2807
Elements correspond to different sequence values.
virtual std::size_t size() const =0
The size in bytes of the data that was allocated.
int EnumMax()
Maximum number of elements in an enumeration type.
int c() const
Get the channel count.
Definition: NvInfer.h:329
int & w()
Get the width.
Definition: NvInfer.h:350
virtual IElementWiseLayer * addElementWise(ITensor &input1, ITensor &input2, ElementWiseOperation op)=0
Add an elementwise layer to the network.
virtual bool getAverageCountExcludesPadding() const =0
Get whether exclusive pooling uses as a denominator the overlap area betwen the window and the unpadd...
Layer that represents a reduction operator.
Definition: NvInfer.h:2604
Definition: NvInfer.h:3952
PoolingType
The type of pooling to perform in a pooling layer.
Definition: NvInfer.h:1058
int h() const
Get the height.
Definition: NvInfer.h:343
Sum of the two elements.
virtual float getBlendFactor() const =0
Get the blending factor for the max_average_blend mode: max_average_blendPool = (1-blendFactor)*maxPo...
int order[Dims::MAX_DIMS]
Definition: NvInfer.h:2715
int EnumMax< RNNGateType >()
Maximum number of elements in RNNGateType enum.
Definition: NvInfer.h:2132
virtual void setMode(ScaleMode mode)=0
Set the scale mode.
virtual ITensor * getInput(int index) const =0
Get the layer input corresponding to the given index.
virtual bool supportsFormat(DataType type, PluginFormat format) const =0
Check format support.
virtual void setConvolutionOutputDimensionsFormula(IOutputDimensionsFormula *formula)=0
Set the convolution output dimensions formula.
int & w()
Get the width.
Definition: NvInfer.h:244
virtual void setSequenceLengths(ITensor &seqLengths)=0
Specify individual sequence lengths in the batch with the ITensor pointed to by seqLengths.
Plugin factory for deserialization.
Definition: NvInfer.h:4233
virtual int getNbInputs() const =0
Get the number of inputs in the network.
virtual ITensor * getHiddenState() const =0
Get the initial hidden state of the RNN.
virtual void setOperation(TopKOperation op)=0
Set the operation for the layer.
Descriptor for data with one channel dimension and two spatial dimensions.
Definition: NvInfer.h:290
virtual DimsHW getWindowSize() const =0
Get the window size for pooling.
The first element to the power of the second element.
A network definition for input to the builder.
Definition: NvInfer.h:2975
virtual int getNbLayers() const =0
Get the number of layers in the network.
virtual IHostMemory * serialize() const =0
Serialize the network to a stream.
virtual CalibrationAlgoType getAlgorithm()
Definition: NvInfer.h:3958
virtual IUnaryLayer * addUnary(ITensor &input, UnaryOperation operation)=0
Add a unary layer to the network.
virtual void setOperation(ReduceOperation op)=0
Set the reduce operation for the layer.
virtual void destroy()=0
Destroy this object.
int EnumMax< CalibrationAlgoType >()
Maximum number of elements in CalibrationAlgoType enum.
Definition: NvInfer.h:3859
virtual RNNDirection getDirection() const =0
Get the direction of the RNN layer.
virtual DimsHW getPadding() const =0
Get the padding for pooling.
virtual size_t getSerializationSize()=0
Find the size of the serialization buffer required.
virtual ITensor * getHiddenState() const =0
Get the initial hidden state of the RNN.
virtual IConstantLayer * addConstant(Dims dimensions, Weights weights)=0
Add a constant layer to the network.
virtual void setGpuAllocator(IGpuAllocator *allocator)=0
Set the GPU allocator.
Divide the first element by the second.
virtual int getMinFindIterations() const =0
Query the number of minimzation iterations.
virtual LayerType getType() const =0
Return the type of a layer.
virtual void setDeviceMemory(void *memory)=0
set the device memory for use by this execution context.
virtual float getK() const =0
Get the LRN K value.
virtual RNNDirection getDirection() const =0
Get the direction of the RNN layer.
Product of the two elements.
Dims2()
Construct an empty Dims2 object.
Definition: NvInfer.h:177
virtual IProfiler * getProfiler() const =0
Get the profiler.
TopKOperation
Enumerates the operations that may be performed by a TopK layer.
Definition: NvInfer.h:2800
virtual Weights getWeightsForGate(int layerIndex, RNNGateType gate, bool isW) const =0
Get the weight parameters for an individual gate in the RNN.
struct CUstream_st * cudaStream_t
Forward declaration of cudaStream_t.
Definition: NvInfer.h:98
Dims3(int d0, int d1, int d2)
Construct a Dims3 from 3 elements.
Definition: NvInfer.h:277
virtual void setStride(DimsHW stride)=0
Set the stride for pooling.
virtual int getSeqLength() const =0
Get the sequence length.
Descriptor for four-dimensional data.
Definition: NvInfer.h:364
virtual DimsHW getStride() const =0
Get the stride for pooling.
Elements correspond to different channels.
int EnumMax< ReduceOperation >()
Maximum number of elements in ReduceOperation enum.
Definition: NvInfer.h:2594
int EnumMax< DimensionType >()
Maximum number of elements in DimensionType enum.
Definition: NvInfer.h:145
ActivationType
Enumerates the types of activation to perform in an activation layer.
Definition: NvInfer.h:1010
virtual void setDeconvolutionOutputDimensionsFormula(IOutputDimensionsFormula *formula)=0
Set the deconvolution output dimensions formula.
Descriptor for data with one index dimension, one channel dimension and two spatial dimensions...
Definition: NvInfer.h:398
virtual void setKernelWeights(Weights weights)=0
Set the kernel weights, given as a KxC matrix in row-major order.
virtual void setMaxBatchSize(int batchSize)=0
Set the maximum batch size.
virtual int getBindingIndex(const char *name) const =0
Retrieve the binding index for a named tensor.
virtual IFullyConnectedLayer * addFullyConnected(ITensor &input, int nbOutputs, Weights kernelWeights, Weights biasWeights)=0
Add a fully connected layer to the network.
virtual void setGatherAxis(int axis)=0
Set the non-batch dimension axis to gather on. The axis must be less than the number of non-batch dim...
virtual int getK() const =0
Get the k value for the layer.
virtual const void * readCalibrationCache(std::size_t &length)=0
Load a calibration cache.
virtual void setBiasForGate(int layerIndex, RNNGateType gate, bool isW, Weights bias)=0
Set the bias parameters for an individual gate in the RNN.
virtual IReduceLayer * addReduce(ITensor &input, ReduceOperation operation, uint32_t reduceAxes, bool keepDimensions)=0
Add a reduce layer to the network.
Data stored on host.
virtual void setPadding(DimsHW padding)=0
Set the padding for pooling.
DimsHW()
Construct an empty DimsHW object.
Definition: NvInfer.h:207
virtual void setWindowSize(DimsHW windowSize)=0
Set the window size for pooling.
Base class for all layer classes in a network definition.
Definition: NvInfer.h:722
virtual void setTranspose(int index, bool val)=0
Set the transpose flag for an input tensor.
Dims4(int d0, int d1, int d2, int d3)
Construct a Dims4 from 4 elements.
Definition: NvInfer.h:384
int EnumMax< PoolingType >()
Maximum number of elements in PoolingType enum.
Definition: NvInfer.h:1066
int & h()
Get the height.
Definition: NvInfer.h:461
virtual int getWindowSize() const =0
Get the LRN window size.
int & h()
Get the height.
Definition: NvInfer.h:230
virtual void free(void *memory)=0
virtual void setActivationType(ActivationType type)=0
Set the type of activation to be performed.
TENSORRTAPI int getInferLibVersion()
Return the library version number.
virtual std::size_t getHiddenSize() const =0
Get the size of the hidden layers.
int & c()
Get the channel count.
Definition: NvInfer.h:447
virtual void setStride(DimsHW stride)=0
Get the stride of the deconvolution.
Structure to define the dimensions of a tensor.
Definition: NvInfer.h:158
Network iterates from first to last and vice versa and outputs concatenated.
#define NV_TENSORRT_PATCH
TensorRT patch version.
Definition: NvInfer.h:58
virtual IRNNLayer * addRNN(ITensor &inputs, int layerCount, std::size_t hiddenSize, int maxSeqLen, RNNOperation op, RNNInputMode mode, RNNDirection dir, Weights weights, Weights bias)=0
Add an layerCount deep RNN layer to the network with a sequence length of maxSeqLen and hiddenSize in...
virtual void setReduceAxes(uint32_t reduceAxes)=0
Set which axes to reduce for the layer.
virtual void setGpuAllocator(IGpuAllocator *allocator)=0
Set the GPU allocator.
Layer type for shuffling data.
Definition: NvInfer.h:2718
virtual bool platformHasFastFp16() const =0
Determine whether the platform has fast native fp16.
DimsCHW(int channels, int height, int width)
Construct a DimsCHW given channel count, height and width.
Definition: NvInfer.h:310
A elementwise layer in a network definition.
Definition: NvInfer.h:1593
A Pooling layer in a network definition.
Definition: NvInfer.h:1079
#define _TENSORRT_FINAL
Defines which symbols are exported.
Definition: NvInfer.h:68
virtual RNNOperation getOperation() const =0
Get the operation of the RNN layer.
virtual void serialize(void *buffer)=0
Serialize the layer.
virtual void setDilation(DimsHW dims)=0
Set the dilation for a convolution.
virtual void setOperation(RNNOperation op)=0
Set the operation of the RNN layer.
virtual void configureWithFormat(const Dims *inputDims, int nbInputs, const Dims *outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize)=0
Configure the layer.
virtual DimsHW getPrePadding() const =0
Set the padding that is applied at the start of the tensor.
virtual void setDirection(RNNDirection op)=0
Set the direction of the RNN layer.
int d[MAX_DIMS]
The extent of each dimension.
Definition: NvInfer.h:163
Application-implemented interface to compute layer output sizes.
Definition: NvInfer.h:2305
Minimum of the two elements.
virtual Weights getBiasWeights() const =0
Get the bias weights.
#define NV_TENSORRT_MINOR
TensorRT minor version.
Definition: NvInfer.h:57
virtual int getNbBindings() const =0
Get the number of binding indices.
virtual void setPadding(DimsHW padding)=0
Set the padding of the convolution.
virtual Dims getDimensions() const =0
Get the dimensions of a tensor.
ElementWiseOperation
Enumerates the binary operations that may be performed by an ElementWise layer.
Definition: NvInfer.h:1567
int64_t count
The number of weights in the array.
Definition: NvInfer.h:498
Three-gate network consisting of Gated Recurrent Units.
virtual const char * getName() const =0
Get the tensor name.
RNNGateType
Identifies an individual gate within an RNN cell.
Definition: NvInfer.h:2120
Network iterations from first input to last input.
virtual void setBiasWeights(Weights weights)=0
Set the bias weights for the deconvolution.
int & c()
Get the channel count.
Definition: NvInfer.h:322
virtual void setSecondTranspose(Permutation permutation)=0
Set the permutation applied by the second transpose operation.
virtual bool getKeepDimensions() const =0
Get the boolean that specifies whether or not to keep the reduced dimensions for the layer...
virtual bool isNetworkOutput() const =0
Whether the tensor is a network output.
Dims3()
Construct an empty Dims3 object.
Definition: NvInfer.h:264
virtual bool getBroadcastAcrossBatch() const =0
Check if tensor is broadcast across the batch.
virtual ITensor * getCellState() const =0
Get the initial cell state of the RNN.
virtual void setCellState(ITensor &cell)=0
Set the initial cell state of the LSTM with the provided cell ITensor.
A tensor in a network definition.
Definition: NvInfer.h:580
virtual void destroy()=0
Destroy this object;.
virtual void setLocation(TensorLocation location)=0
Set the storage location of a tensor.
int EnumMax< UnaryOperation >()
Maximum number of elements in UnaryOperation enum.
Definition: NvInfer.h:2548
virtual void setBlendFactor(float blendFactor)=0
Set the blending factor for the max_average_blend mode: max_average_blendPool = (1-blendFactor)*maxPo...
An array of weights used as a layer parameter.
Definition: NvInfer.h:493
int & w()
Get the width.
Definition: NvInfer.h:475
int h() const
Get the height.
Definition: NvInfer.h:237
virtual int getDataLength() const =0
Get the length of the data being processed by the RNN for use in computing other values.
virtual nvinfer1::ICudaEngine * deserializeCudaEngine(const void *blob, std::size_t size, IPluginFactory *pluginFactory)=0
Deserialize an engine from a stream.
virtual nvinfer1::ICudaEngine * buildCudaEngine(nvinfer1::INetworkDefinition &network)=0
Build a CUDA engine from a network definition.
Dims2(int d0, int d1)
Construct a Dims2 from 2 elements.
Definition: NvInfer.h:189
virtual Weights getShift() const =0
Get the shift value.
virtual void setAxis(int axis)=0
Set the axis along which concatenation occurs.
virtual int getNbOutputMaps() const =0
Get the number of output feature maps for the deconvolution.
Dims4()
Construct an empty Dims2 object.
Definition: NvInfer.h:370
virtual void setScale(Weights scale)=0
Set the scale value.
virtual IPoolingLayer * addPooling(ITensor &input, PoolingType type, DimsHW windowSize)=0
Add a pooling layer to the network.
int nbDims
The number of dimensions.
Definition: NvInfer.h:162
virtual bool platformHasFastInt8() const =0
Determine whether the platform has fast native int8.
virtual DimsHW getStride() const =0
Get the stride of the convolution.
Application-implemented interface for calibration.
Definition: NvInfer.h:3875
virtual DimsHW compute(DimsHW inputDims, DimsHW kernelSize, DimsHW stride, DimsHW padding, DimsHW dilation, const char *layerName)=0
Application-implemented interface to compute the HW output dimensions of a layer from the layer input...
Application-implemented logging interface for the builder, engine and runtime.
Definition: NvInfer.h:4300
virtual IPluginLayer * addPluginExt(ITensor *const *inputs, int nbInputs, IPluginExt &plugin)=0
Add a plugin layer to the network using an IPluginExt interface.
virtual void setDirection(RNNDirection op)=0
Set the direction of the RNN layer.
virtual void setInt8Calibrator(IInt8Calibrator *calibrator)=0
Set Int8 Calibration interface.
DimsHW(int height, int width)
Construct a DimsHW given height and width.
Definition: NvInfer.h:219
virtual void * data() const =0
A pointer to the raw data that is owned by the library.
Identical coefficients across all elements of the tensor.
void configure(const Dims *inputDims, int nbInputs, const Dims *outputDims, int nbOutputs, int maxBatchSize) _TENSORRT_FINAL
Derived classes should not implement this. In a C++11 API it would be override final.
Definition: NvInfer.h:2505
int EnumMax< ElementWiseOperation >()
Maximum number of elements in ElementWiseOperation enum.
Definition: NvInfer.h:1579
Plugin class for user-implemented layers.
Definition: NvInfer.h:2458
Layer that represents a padding operation.
Definition: NvInfer.h:2661
virtual void setBiasWeights(Weights weights)=0
Set the bias weights.
virtual void setNbOutputMaps(int nbOutputMaps)=0
Set the number of output feature maps for the deconvolution.
virtual void setNbOutputChannels(int nbOutputs)=0
Set the number of output channels K from the fully connected layer.
virtual void setKernelWeights(Weights weights)=0
Set the kernel weights for the convolution.
virtual Weights getBiasWeights() const =0
Get the bias weights for the convolution.
virtual size_t getWorkspaceSize(int maxBatchSize) const =0
Find the workspace size required by the layer.
virtual uint32_t getReduceAxes() const =0
Get the axes over which to reduce for the layer.
virtual ITensor * getOutput(int index) const =0
Get the layer output corresponding to the given index.
virtual IPlugin & getPlugin()=0
Get the plugin for the layer.
virtual void setInputMode(RNNInputMode op)=0
Set the operation of the RNN layer.
virtual void setNbGroups(int nbGroups)=0
Set the number of groups for a convolution.
virtual void setCellState(ITensor &cell)=0
Set the initial cell state of the RNN with the provided cell ITensor.
virtual void setKernelSize(DimsHW kernelSize)=0
Set the HW kernel size of the convolution.
CalibrationAlgoType
Version of calibration algorithm to use.
Definition: NvInfer.h:3852
virtual void setStride(DimsHW stride)=0
Get the stride of the convolution.
virtual float getAlpha() const =0
Get the LRN alpha value.
virtual void setReduceAxes(uint32_t reduceAxes)=0
Set the axes over which to reduce.
Informational messages.
virtual IConvolutionLayer * addConvolution(ITensor &input, int nbOutputMaps, DimsHW kernelSize, Weights kernelWeights, Weights biasWeights)=0
Add a convolution layer to the network.
virtual void setFirstTranspose(Permutation permutation)=0
Set the permutation applied by the first transpose operation.
DimsNCHW()
Construct an empty DimsNCHW object.
Definition: NvInfer.h:404
Class to handle library allocated memory that is accessible to the user.
Definition: NvInfer.h:509
LayerType
The type values of layer classes.
Definition: NvInfer.h:527
int EnumMax< ScaleMode >()
Maximum number of elements in ScaleMode enum.
Definition: NvInfer.h:1270
virtual void setDimensions(Dims dimensions)=0
Set the dimensions for the layer.
virtual Weights getBiasWeights() const =0
Get the bias weights for the deconvolution.
virtual void setProfiler(IProfiler *)=0
Set the profiler.
virtual void setWeights(Weights weights)=0
Set the weights for the layer.
RNNInputMode
Enumerates the RNN input modes that may occur with an RNN layer.
Definition: NvInfer.h:1767
A RNN layer in a network definition.
Definition: NvInfer.h:1788
virtual void setName(const char *name)=0
Set the name of a layer.
virtual ActivationType getActivationType() const =0
Get the type of activation to be performed.
virtual Dims getDimensions() const =0
Get the dimensions for the layer.
virtual void setDebugSync(bool sync)=0
Set the debug sync flag.
virtual void destroy()=0
Destroy this object.
virtual void setPostPadding(DimsHW padding)=0
Set the padding that is applied at the end of the tensor.
Layer type for plugins.
Definition: NvInfer.h:2516
virtual UnaryOperation getOperation() const =0
Get the unary operation for the layer.
DimensionType
The type of data encoded across this dimension.
Definition: NvInfer.h:136
virtual IConcatenationLayer * addConcatenation(ITensor *const *inputs, int nbInputs)=0
Add a concatenation layer to the network.
Four-gate LSTM network w/o peephole connections.
virtual DimsHW getPostPadding() const =0
Set the padding that is applied at the end of the tensor.
virtual const ICudaEngine & getEngine() const =0
Get the associated engine.
virtual void * allocate(uint64_t size, uint64_t alignment, uint32_t flags)=0
virtual void setHiddenState(ITensor &hidden)=0
Set the initial hidden state of the RNN with the provided hidden ITensor.
virtual TensorLocation getLocation(int bindingIndex) const =0
Get location of binding.
virtual ScaleMode getMode() const =0
Set the scale mode.
virtual ILRNLayer * addLRN(ITensor &input, int window, float alpha, float beta, float k)=0
Add a LRN layer to the network.
virtual int getTensorRTVersion() const
Return the API version with which this plugin was built.
Definition: NvInfer.h:2466
virtual void setMaxWorkspaceSize(std::size_t workspaceSize)=0
Set the maximum workspace size.
virtual void setShift(Weights shift)=0
Set the shift value.
virtual size_t getDeviceMemorySize() const =0
Return the amount of device memory required by an execution context.
virtual Weights getKernelWeights() const =0
Get the kernel weights for the deconvolution.
Application-implemented class for controlling allocation on the GPU.
Definition: NvInfer.h:4006
Context for executing inference using an engine.
Definition: NvInfer.h:3585
virtual ITensor * getInput(int index) const =0
Get the input tensor specified by the given index.
virtual void setPower(Weights power)=0
Set the power value.
virtual double getRegressionCutoff() const =0
The fraction (between 0 and 1) of the maximum used to define the regression cutoff when using regress...
virtual void setHalf2Mode(bool mode)=0
Set whether half2 mode is used.
virtual std::size_t getMaxWorkspaceSize() const =0
Get the maximum workspace size.
virtual int getAxis() const =0
Get the axis along which concatenation occurs.
Descriptor for two-dimensional spatial data.
Definition: NvInfer.h:201
UnaryOperation
Enumerates the unary operations that may be performed by a Unary layer.
Definition: NvInfer.h:2537
virtual void setAverageFindIterations(int avgFind)=0
Set the number of minimization iterations used when timing layers.
virtual int getNbOutputMaps() const =0
Get the number of output maps for the convolution.
virtual RNNOperation getOperation() const =0
Get the operation of the RNN layer.
virtual bool getHalf2Mode() const =0
Query whether half2 mode is used.
virtual int getNbInputs() const =0
Get the number of inputs of a layer.
virtual ISoftMaxLayer * addSoftMax(ITensor &input)=0
Add a SoftMax layer to the network.
Elements correspond to different batch index.
virtual Weights getBiasForGate(int layerIndex, RNNGateType gate, bool isW) const =0
Get the bias parameters for an individual gate in the RNN.
Descriptor for two-dimensional data.
Definition: NvInfer.h:171
virtual int getMaxBatchSize() const =0
Get the maximum batch size which can be used for inference.
virtual void setName(const char *name)=0
Set the tensor name.
virtual IOutputDimensionsFormula & getPoolingOutputDimensionsFormula() const =0
Get the pooling output dimensions formula.
virtual int getNbOutputs() const =0
Get the number of outputs of a layer.
virtual IGatherLayer * addGather(ITensor &data, ITensor &indices, int axis)=0
Add a gather layer to the network.
virtual IOutputDimensionsFormula & getDeconvolutionOutputDimensionsFormula() const =0
Get the deconvolution output dimensions formula.
virtual void setDimensions(Dims dimensions)=0
Set the dimensions of a tensor.
virtual void setWindowSize(int windowSize)=0
Set the LRN window size.
virtual Weights getKernelWeights() const =0
Get the kernel weights for the convolution.
virtual CalibrationAlgoType getAlgorithm()=0
Get the algorithm used by this calibrator.
Application-implemented interface for profiling.
Definition: NvInfer.h:3560
virtual RNNInputMode getInputMode() const =0
Get the operation of the RNN layer.
A concatenation layer in a network definition.
Definition: NvInfer.h:1406
virtual void setK(float k)=0
Set the LRN K value.
virtual void setNbGroups(int nbGroups)=0
Set the number of groups for a deconvolution.
virtual IPluginLayer * addPlugin(ITensor *const *inputs, int nbInputs, IPlugin &plugin)=0
Add a plugin layer to the network.
int EnumMax< RNNDirection >()
Maximum number of elements in RNNDirection enum.
Definition: NvInfer.h:1747
virtual const void * readHistogramCache(std::size_t &length)=0
Load a histogram.
virtual Permutation getFirstTranspose() const =0
Get the permutation applied by the first transpose operation.
virtual IExecutionContext * createExecutionContextWithoutDeviceMemory()=0
create an execution context without any device memory allocated
virtual void setKernelSize(DimsHW kernelSize)=0
Set the HW kernel size of the convolution.
virtual void setWeights(Weights weights)=0
Set the weight parameters for the RNN.
virtual Weights getWeights() const =0
Get the W weights for the RNN.
virtual void terminate()=0
Shutdown the layer. This is called when the engine is destroyed.
virtual PoolingType getPoolingType() const =0
Get the type of activation to be performed.
A deconvolution layer in a network definition.
Definition: NvInfer.h:1437
virtual IShuffleLayer * addShuffle(ITensor &input)=0
Add a shuffle layer to the network.
virtual void setPoolingType(PoolingType type)=0
Set the type of activation to be performed.
virtual IOutputDimensionsFormula & getConvolutionOutputDimensionsFormula() const =0
Get the convolution output dimensions formula.
virtual void setDebugSync(bool sync)=0
Set whether the builder should use debug synchronization.
virtual uint32_t getReduceAxes() const =0
Get the axes to reduce for the layer.
virtual int initialize()=0
Initialize the layer for execution. This is called when the engine is created.
Definition: NvInfer.h:3938
int EnumMax< RNNInputMode >()
Maximum number of elements in RNNInputMode enum.
Definition: NvInfer.h:1774