TensorRT 11.0.0
NvInfer.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright (c) 1993-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef NV_INFER_H
19#define NV_INFER_H
20
21#include "NvInferLegacyDims.h"
22#include "NvInferRuntime.h" // IWYU pragma: export
23
35
41
47namespace nvinfer1
48{
49
57enum class LayerType : int32_t
58{
59 kCONVOLUTION = 0,
60 kCAST = 1,
61 kACTIVATION = 2,
62 kPOOLING = 3,
63 kLRN = 4,
64 kSCALE = 5,
65 kSOFTMAX = 6,
66 kDECONVOLUTION = 7,
67 kCONCATENATION = 8,
68 kELEMENTWISE = 9,
69 kPLUGIN = 10,
70 kUNARY = 11,
71 kPADDING = 12,
72 kSHUFFLE = 13,
73 kREDUCE = 14,
74 kTOPK = 15,
75 kGATHER = 16,
76 kMATRIX_MULTIPLY = 17,
77 kRAGGED_SOFTMAX = 18,
78 kCONSTANT = 19,
79 kIDENTITY = 20,
80 kPLUGIN_V2 = 21,
81 kSLICE = 22,
82 kSHAPE = 23,
83 kPARAMETRIC_RELU = 24,
84 kRESIZE = 25,
85 kTRIP_LIMIT = 26,
86 kRECURRENCE = 27,
87 kITERATOR = 28,
88 kLOOP_OUTPUT = 29,
89 kSELECT = 30,
90 kFILL = 31,
91 kQUANTIZE = 32,
92 kDEQUANTIZE = 33,
93 kCONDITION = 34,
96 kSCATTER = 37,
97 kEINSUM = 38,
98 kASSERTION = 39,
99 kONE_HOT = 40,
100 kNON_ZERO = 41,
101 kGRID_SAMPLE = 42,
102 kNMS = 43,
103 kREVERSE_SEQUENCE = 44,
104 kNORMALIZATION = 45,
105 kPLUGIN_V3 = 46,
106 kSQUEEZE = 47,
107 kUNSQUEEZE = 48,
108 kCUMULATIVE = 49,
109 kDYNAMIC_QUANTIZE = 50,
110 kATTENTION_INPUT = 51,
111 kATTENTION_OUTPUT = 52,
112 kROTARY_EMBEDDING = 53,
113 kKVCACHE_UPDATE = 54,
114 kMOE = 55,
115 kDIST_COLLECTIVE = 56,
116};
117
123template <>
125{
126 static constexpr int32_t kVALUE = 57;
127};
128
135using TensorFormats = uint32_t;
136
142enum class ActivationType : int32_t
143{
144 kRELU = 0,
145 kSIGMOID = 1,
146 kTANH = 2,
147 kLEAKY_RELU = 3,
148 kELU = 4,
149 kSELU = 5,
150 kSOFTSIGN = 6,
151 kSOFTPLUS = 7,
152 kCLIP = 8,
153 kHARD_SIGMOID = 9,
154 kSCALED_TANH = 10,
155 kTHRESHOLDED_RELU = 11,
156 kGELU_ERF = 12,
157 kGELU_TANH = 13
158};
159
165template <>
167{
168 static constexpr int32_t kVALUE = 14;
169};
170
185class ITensor : public INoCopy
186{
187public:
203 void setName(char const* name) noexcept
204 {
205 mImpl->setName(name);
206 }
207
215 char const* getName() const noexcept
216 {
217 return mImpl->getName();
218 }
219
234 void setDimensions(Dims const& dimensions) noexcept
235 {
236 mImpl->setDimensions(dimensions);
237 }
238
248 Dims getDimensions() const noexcept
249 {
250 return mImpl->getDimensions();
251 }
252
253
263 DataType getType() const noexcept
264 {
265 return mImpl->getType();
266 }
267
271 bool isNetworkInput() const noexcept
272 {
273 return mImpl->isNetworkInput();
274 }
275
279 bool isNetworkOutput() const noexcept
280 {
281 return mImpl->isNetworkOutput();
282 }
283
296 TRT_DEPRECATED void setBroadcastAcrossBatch(bool broadcastAcrossBatch) noexcept
297 {
298 mImpl->setBroadcastAcrossBatch(broadcastAcrossBatch);
299 }
300
311 {
312 return mImpl->getBroadcastAcrossBatch();
313 }
314
323 {
324 return mImpl->getLocation();
325 }
326
342 {
343 mImpl->setLocation(location);
344 }
345
346
364 void setAllowedFormats(TensorFormats formats) noexcept
365 {
366 mImpl->setAllowedFormats(formats);
367 }
368
378 {
379 return mImpl->getAllowedFormats();
380 }
381
408 bool isShapeTensor() const noexcept
409 {
410 return mImpl->isShapeTensor();
411 }
412
429 bool isExecutionTensor() const noexcept
430 {
431 return mImpl->isExecutionTensor();
432 }
433
455 void setDimensionName(int32_t index, char const* name) noexcept
456 {
457 mImpl->setDimensionName(index, name);
458 }
459
470 char const* getDimensionName(int32_t index) const noexcept
471 {
472 return mImpl->getDimensionName(index);
473 }
474
475protected:
476 apiv::VTensor* mImpl;
477 virtual ~ITensor() noexcept = 0;
478};
479
480inline ITensor::~ITensor() noexcept = default;
481
489class ILayer : public INoCopy
490{
491public:
497 LayerType getType() const noexcept
498 {
499 return mLayer->getType();
500 }
501
511 void setName(char const* name) noexcept
512 {
513 mLayer->setName(name);
514 }
515
521 char const* getName() const noexcept
522 {
523 return mLayer->getName();
524 }
525
529 int32_t getNbInputs() const noexcept
530 {
531 return mLayer->getNbInputs();
532 }
533
542 ITensor* getInput(int32_t index) const noexcept
543 {
544 return mLayer->getInput(index);
545 }
546
550 int32_t getNbOutputs() const noexcept
551 {
552 return mLayer->getNbOutputs();
553 }
554
560 ITensor* getOutput(int32_t index) const noexcept
561 {
562 return mLayer->getOutput(index);
563 }
564
577 void setInput(int32_t index, ITensor& tensor) noexcept
578 {
579 return mLayer->setInput(index, tensor);
580 }
581
582
592 DataType getOutputType(int32_t index) const noexcept
593 {
594 return mLayer->getOutputType(index);
595 }
596
597
611 void setMetadata(char const* metadata) noexcept
612 {
613 mLayer->setMetadata(metadata);
614 }
615
624 char const* getMetadata() const noexcept
625 {
626 return mLayer->getMetadata();
627 }
628
645 bool setNbRanks(int32_t nbRanks) noexcept
646 {
647 return mLayer->setNbRanks(nbRanks);
648 }
649
657 int32_t getNbRanks() const noexcept
658 {
659 return mLayer->getNbRanks();
660 }
661
662protected:
663 virtual ~ILayer() noexcept = 0;
664 apiv::VLayer* mLayer;
665};
666
667inline ILayer::~ILayer() noexcept = default;
668
825enum class PaddingMode : int32_t
826{
829 kSAME_UPPER = 2,
830 kSAME_LOWER = 3,
831};
832
838template <>
840{
841 static constexpr int32_t kVALUE = 4;
842};
843
857{
858public:
866 void setNbOutputMaps(int64_t nbOutputMaps) noexcept
867 {
868 mImpl->setNbOutputMaps(nbOutputMaps);
869 }
870
876 int64_t getNbOutputMaps() const noexcept
877 {
878 return mImpl->getNbOutputMaps();
879 }
880
896 void setNbGroups(int64_t nbGroups) noexcept
897 {
898 mImpl->setNbGroups(nbGroups);
899 }
900
906 int64_t getNbGroups() const noexcept
907 {
908 return mImpl->getNbGroups();
909 }
910
920 void setKernelWeights(Weights weights) noexcept
921 {
922 mImpl->setKernelWeights(weights);
923 }
924
930 Weights getKernelWeights() const noexcept
931 {
932 return mImpl->getKernelWeights();
933 }
934
945 void setBiasWeights(Weights weights) noexcept
946 {
947 mImpl->setBiasWeights(weights);
948 }
949
955 Weights getBiasWeights() const noexcept
956 {
957 return mImpl->getBiasWeights();
958 }
959
972 void setPrePadding(Dims const& padding) noexcept
973 {
974 mImpl->setPrePadding(padding);
975 }
976
982 Dims getPrePadding() const noexcept
983 {
984 return mImpl->getPrePadding();
985 }
986
999 void setPostPadding(Dims const& padding) noexcept
1000 {
1001 mImpl->setPostPadding(padding);
1002 }
1003
1009 Dims getPostPadding() const noexcept
1010 {
1011 return mImpl->getPostPadding();
1012 }
1013
1023 void setPaddingMode(PaddingMode paddingMode) noexcept
1024 {
1025 mImpl->setPaddingMode(paddingMode);
1026 }
1027
1036 {
1037 return mImpl->getPaddingMode();
1038 }
1039
1048 void setKernelSizeNd(Dims const& kernelSize) noexcept
1049 {
1050 mImpl->setKernelSizeNd(kernelSize);
1051 }
1052
1058 Dims getKernelSizeNd() const noexcept
1059 {
1060 return mImpl->getKernelSizeNd();
1061 }
1062
1073 void setStrideNd(Dims const& stride) noexcept
1074 {
1075 mImpl->setStrideNd(stride);
1076 }
1077
1083 Dims getStrideNd() const noexcept
1084 {
1085 return mImpl->getStrideNd();
1086 }
1087
1101 void setPaddingNd(Dims const& padding) noexcept
1102 {
1103 mImpl->setPaddingNd(padding);
1104 }
1105
1113 Dims getPaddingNd() const noexcept
1114 {
1115 return mImpl->getPaddingNd();
1116 }
1117
1127 void setDilationNd(Dims const& dilation) noexcept
1128 {
1129 mImpl->setDilationNd(dilation);
1130 }
1131
1137 Dims getDilationNd() const noexcept
1138 {
1139 return mImpl->getDilationNd();
1140 }
1141
1156 using ILayer::setInput;
1157
1158protected:
1159 virtual ~IConvolutionLayer() noexcept = 0;
1160 apiv::VConvolutionLayer* mImpl;
1161};
1162
1163inline IConvolutionLayer::~IConvolutionLayer() noexcept = default;
1164
1179{
1180public:
1189 {
1190 mImpl->setActivationType(type);
1191 }
1192
1199 {
1200 return mImpl->getActivationType();
1201 }
1202
1213 void setAlpha(float alpha) noexcept
1214 {
1215 mImpl->setAlpha(alpha);
1216 }
1217
1227 void setBeta(float beta) noexcept
1228 {
1229 mImpl->setBeta(beta);
1230 }
1231
1236 float getAlpha() const noexcept
1237 {
1238 return mImpl->getAlpha();
1239 }
1240
1245 float getBeta() const noexcept
1246 {
1247 return mImpl->getBeta();
1248 }
1249
1250protected:
1251 virtual ~IActivationLayer() noexcept = 0;
1252 apiv::VActivationLayer* mImpl;
1253};
1254
1255inline IActivationLayer::~IActivationLayer() noexcept = default;
1256
1262enum class PoolingType : int32_t
1263{
1264 kMAX = 0,
1265 kAVERAGE = 1,
1267};
1268
1274template <>
1276{
1277 static constexpr int32_t kVALUE = 3;
1278};
1279
1291class IPoolingLayer : public ILayer
1292{
1293public:
1301 void setPoolingType(PoolingType type) noexcept
1302 {
1303 mImpl->setPoolingType(type);
1304 }
1305
1312 {
1313 return mImpl->getPoolingType();
1314 }
1315
1326 void setBlendFactor(float blendFactor) noexcept
1327 {
1328 mImpl->setBlendFactor(blendFactor);
1329 }
1330
1339 float getBlendFactor() const noexcept
1340 {
1341 return mImpl->getBlendFactor();
1342 }
1343
1353 void setAverageCountExcludesPadding(bool exclusive) noexcept
1354 {
1355 mImpl->setAverageCountExcludesPadding(exclusive);
1356 }
1357
1365 {
1366 return mImpl->getAverageCountExcludesPadding();
1367 }
1368
1382 void setPrePadding(Dims const& padding) noexcept
1383 {
1384 mImpl->setPrePadding(padding);
1385 }
1386
1392 Dims getPrePadding() const noexcept
1393 {
1394 return mImpl->getPrePadding();
1395 }
1396
1410 void setPostPadding(Dims const& padding) noexcept
1411 {
1412 mImpl->setPostPadding(padding);
1413 }
1414
1420 Dims getPostPadding() const noexcept
1421 {
1422 return mImpl->getPostPadding();
1423 }
1424
1433 void setPaddingMode(PaddingMode paddingMode) noexcept
1434 {
1435 mImpl->setPaddingMode(paddingMode);
1436 }
1437
1445 {
1446 return mImpl->getPaddingMode();
1447 }
1448
1457 void setWindowSizeNd(Dims const& windowSize) noexcept
1458 {
1459 mImpl->setWindowSizeNd(windowSize);
1460 }
1461
1467 Dims getWindowSizeNd() const noexcept
1468 {
1469 return mImpl->getWindowSizeNd();
1470 }
1471
1482 void setStrideNd(Dims const& stride) noexcept
1483 {
1484 mImpl->setStrideNd(stride);
1485 }
1486
1492 Dims getStrideNd() const noexcept
1493 {
1494 return mImpl->getStrideNd();
1495 }
1496
1511 void setPaddingNd(Dims const& padding) noexcept
1512 {
1513 mImpl->setPaddingNd(padding);
1514 }
1515
1523 Dims getPaddingNd() const noexcept
1524 {
1525 return mImpl->getPaddingNd();
1526 }
1527
1528protected:
1529 virtual ~IPoolingLayer() noexcept = 0;
1530 apiv::VPoolingLayer* mImpl;
1531};
1532
1533inline IPoolingLayer::~IPoolingLayer() noexcept = default;
1534
1544class ILRNLayer : public ILayer
1545{
1546public:
1556 void setWindowSize(int64_t windowSize) noexcept
1557 {
1558 mImpl->setWindowSize(windowSize);
1559 }
1560
1566 int64_t getWindowSize() const noexcept
1567 {
1568 return mImpl->getWindowSize();
1569 }
1570
1578 void setAlpha(float alpha) noexcept
1579 {
1580 mImpl->setAlpha(alpha);
1581 }
1582
1588 float getAlpha() const noexcept
1589 {
1590 return mImpl->getAlpha();
1591 }
1592
1600 void setBeta(float beta) noexcept
1601 {
1602 mImpl->setBeta(beta);
1603 }
1604
1610 float getBeta() const noexcept
1611 {
1612 return mImpl->getBeta();
1613 }
1614
1622 void setK(float k) noexcept
1623 {
1624 mImpl->setK(k);
1625 }
1626
1632 float getK() const noexcept
1633 {
1634 return mImpl->getK();
1635 }
1636
1637protected:
1638 virtual ~ILRNLayer() noexcept = 0;
1639 apiv::VLRNLayer* mImpl;
1640};
1641
1642inline ILRNLayer::~ILRNLayer() noexcept = default;
1643
1649enum class ScaleMode : int32_t
1650{
1651 kUNIFORM = 0,
1652 kCHANNEL = 1,
1653 kELEMENTWISE = 2
1654};
1655
1661template <>
1663{
1664 static constexpr int32_t kVALUE = 3;
1665};
1666
1692class IScaleLayer : public ILayer
1693{
1694public:
1700 void setMode(ScaleMode mode) noexcept
1701 {
1702 mImpl->setMode(mode);
1703 }
1704
1710 ScaleMode getMode() const noexcept
1711 {
1712 return mImpl->getMode();
1713 }
1714
1720 void setShift(Weights shift) noexcept
1721 {
1722 mImpl->setShift(shift);
1723 }
1724
1730 Weights getShift() const noexcept
1731 {
1732 return mImpl->getShift();
1733 }
1734
1740 void setScale(Weights scale) noexcept
1741 {
1742 mImpl->setScale(scale);
1743 }
1744
1750 Weights getScale() const noexcept
1751 {
1752 return mImpl->getScale();
1753 }
1754
1760 void setPower(Weights power) noexcept
1761 {
1762 mImpl->setPower(power);
1763 }
1764
1770 Weights getPower() const noexcept
1771 {
1772 return mImpl->getPower();
1773 }
1774
1785 int32_t getChannelAxis() const noexcept
1786 {
1787 return mImpl->getChannelAxis();
1788 }
1789
1806 void setChannelAxis(int32_t channelAxis) noexcept
1807 {
1808 mImpl->setChannelAxis(channelAxis);
1809 }
1810
1811protected:
1812 virtual ~IScaleLayer() noexcept = 0;
1813 apiv::VScaleLayer* mImpl;
1814};
1815
1816inline IScaleLayer::~IScaleLayer() noexcept = default;
1817
1838class ISoftMaxLayer : public ILayer
1839{
1840public:
1861 void setAxes(uint32_t axes) noexcept
1862 {
1863 mImpl->setAxes(axes);
1864 }
1865
1871 uint32_t getAxes() const noexcept
1872 {
1873 return mImpl->getAxes();
1874 }
1875
1876protected:
1877 virtual ~ISoftMaxLayer() noexcept = 0;
1878 apiv::VSoftMaxLayer* mImpl;
1879};
1880
1881inline ISoftMaxLayer::~ISoftMaxLayer() noexcept = default;
1882
1896{
1897public:
1909 void setAxis(int32_t axis) noexcept
1910 {
1911 mImpl->setAxis(axis);
1912 }
1913
1919 int32_t getAxis() const noexcept
1920 {
1921 return mImpl->getAxis();
1922 }
1923
1924protected:
1925 virtual ~IConcatenationLayer() noexcept = 0;
1926 apiv::VConcatenationLayer* mImpl;
1927};
1928
1929inline IConcatenationLayer::~IConcatenationLayer() noexcept = default;
1930
1939{
1940public:
1948 void setNbOutputMaps(int64_t nbOutputMaps) noexcept
1949 {
1950 mImpl->setNbOutputMaps(nbOutputMaps);
1951 }
1952
1958 int64_t getNbOutputMaps() const noexcept
1959 {
1960 return mImpl->getNbOutputMaps();
1961 }
1962
1978 void setNbGroups(int64_t nbGroups) noexcept
1979 {
1980 mImpl->setNbGroups(nbGroups);
1981 }
1982
1988 int64_t getNbGroups() const noexcept
1989 {
1990 return mImpl->getNbGroups();
1991 }
1992
2002 void setKernelWeights(Weights weights) noexcept
2003 {
2004 mImpl->setKernelWeights(weights);
2005 }
2006
2012 Weights getKernelWeights() const noexcept
2013 {
2014 return mImpl->getKernelWeights();
2015 }
2016
2027 void setBiasWeights(Weights weights) noexcept
2028 {
2029 mImpl->setBiasWeights(weights);
2030 }
2031
2037 Weights getBiasWeights() const noexcept
2038 {
2039 return mImpl->getBiasWeights();
2040 }
2041
2054 void setPrePadding(Dims const& padding) noexcept
2055 {
2056 mImpl->setPrePadding(padding);
2057 }
2058
2064 Dims getPrePadding() const noexcept
2065 {
2066 return mImpl->getPrePadding();
2067 }
2068
2081 void setPostPadding(Dims const& padding) noexcept
2082 {
2083 mImpl->setPostPadding(padding);
2084 }
2085
2091 Dims getPostPadding() const noexcept
2092 {
2093 return mImpl->getPostPadding();
2094 }
2095
2105 void setPaddingMode(PaddingMode paddingMode) noexcept
2106 {
2107 mImpl->setPaddingMode(paddingMode);
2108 }
2109
2118 {
2119 return mImpl->getPaddingMode();
2120 }
2121
2132 void setKernelSizeNd(Dims const& kernelSize) noexcept
2133 {
2134 mImpl->setKernelSizeNd(kernelSize);
2135 }
2136
2142 Dims getKernelSizeNd() const noexcept
2143 {
2144 return mImpl->getKernelSizeNd();
2145 }
2146
2159 void setStrideNd(Dims const& stride) noexcept
2160 {
2161 mImpl->setStrideNd(stride);
2162 }
2163
2169 Dims getStrideNd() const noexcept
2170 {
2171 return mImpl->getStrideNd();
2172 }
2173
2187 void setPaddingNd(Dims const& padding) noexcept
2188 {
2189 mImpl->setPaddingNd(padding);
2190 }
2191
2199 Dims getPaddingNd() const noexcept
2200 {
2201 return mImpl->getPaddingNd();
2202 }
2203
2216 using ILayer::setInput;
2217
2225 void setDilationNd(Dims const& dilation) noexcept
2226 {
2227 mImpl->setDilationNd(dilation);
2228 }
2229
2235 Dims getDilationNd() const noexcept
2236 {
2237 return mImpl->getDilationNd();
2238 }
2239
2240protected:
2241 virtual ~IDeconvolutionLayer() noexcept = 0;
2242 apiv::VDeconvolutionLayer* mImpl;
2243};
2244
2245inline IDeconvolutionLayer::~IDeconvolutionLayer() noexcept = default;
2246
2259enum class ElementWiseOperation : int32_t
2260{
2261 kSUM = 0,
2262 kPROD = 1,
2263 kMAX = 2,
2264 kMIN = 3,
2265 kSUB = 4,
2266 kDIV = 5,
2267 kPOW = 6,
2268 kFLOOR_DIV = 7,
2269 kAND = 8,
2270 kOR = 9,
2271 kXOR = 10,
2272 kEQUAL = 11,
2273 kGREATER = 12,
2274 kLESS = 13
2275};
2276
2282template <>
2284{
2285 static constexpr int32_t kVALUE = 14;
2286};
2287
2308{
2309public:
2320 {
2321 return mImpl->setOperation(op);
2322 }
2323
2332 {
2333 return mImpl->getOperation();
2334 }
2335
2336protected:
2337 apiv::VElementWiseLayer* mImpl;
2338 virtual ~IElementWiseLayer() noexcept = 0;
2339};
2340
2341inline IElementWiseLayer::~IElementWiseLayer() noexcept = default;
2342
2348enum class GatherMode : int32_t
2349{
2350 kDEFAULT = 0,
2351 kELEMENT = 1,
2352 kND = 2
2353};
2354
2360template <>
2362{
2363 static constexpr int32_t kVALUE = 3;
2364};
2365
2442class IGatherLayer : public ILayer
2443{
2444public:
2454 void setGatherAxis(int32_t axis) noexcept
2455 {
2456 mImpl->setGatherAxis(axis);
2457 }
2458
2466 int32_t getGatherAxis() const noexcept
2467 {
2468 return mImpl->getGatherAxis();
2469 }
2470
2489 void setNbElementWiseDims(int32_t elementWiseDims) noexcept
2490 {
2491 mImpl->setNbElementWiseDims(elementWiseDims);
2492 }
2493
2499 int32_t getNbElementWiseDims() const noexcept
2500 {
2501 return mImpl->getNbElementWiseDims();
2502 }
2503
2509 void setMode(GatherMode mode) noexcept
2510 {
2511 mImpl->setMode(mode);
2512 }
2513
2519 GatherMode getMode() const noexcept
2520 {
2521 return mImpl->getMode();
2522 }
2523
2524protected:
2525 apiv::VGatherLayer* mImpl;
2526 virtual ~IGatherLayer() noexcept = 0;
2527};
2528
2529inline IGatherLayer::~IGatherLayer() noexcept = default;
2530
2543{
2544public:
2551 {
2552 return mImpl->getPlugin();
2553 }
2554
2555protected:
2556 apiv::VPluginV2Layer* mImpl;
2557 virtual ~IPluginV2Layer() noexcept = 0;
2558};
2559
2560inline IPluginV2Layer::~IPluginV2Layer() noexcept = default;
2561
2572{
2573public:
2580 {
2581 return mImpl->getPlugin();
2582 }
2583
2584protected:
2585 apiv::VPluginV3Layer* mImpl;
2586 virtual ~IPluginV3Layer() noexcept = 0;
2587};
2588
2589inline IPluginV3Layer::~IPluginV3Layer() noexcept = default;
2590
2607enum class UnaryOperation : int32_t
2608{
2609 kEXP = 0,
2610 kLOG = 1,
2611 kSQRT = 2,
2612 kRECIP = 3,
2613 kABS = 4,
2614 kNEG = 5,
2615 kSIN = 6,
2616 kCOS = 7,
2617 kTAN = 8,
2618 kSINH = 9,
2619 kCOSH = 10,
2620 kASIN = 11,
2621 kACOS = 12,
2622 kATAN = 13,
2623 kASINH = 14,
2624 kACOSH = 15,
2625 kATANH = 16,
2626 kCEIL = 17,
2627 kFLOOR = 18,
2628 kERF = 19,
2629 kNOT = 20,
2630 kSIGN = 21,
2631 kROUND = 22,
2632 kISINF = 23,
2633 kISNAN = 24,
2634};
2635
2641template <>
2643{
2644 static constexpr int32_t kVALUE = 25;
2645};
2646
2654class IUnaryLayer : public ILayer
2655{
2656public:
2665 {
2666 mImpl->setOperation(op);
2667 }
2668
2675 {
2676 return mImpl->getOperation();
2677 }
2678
2679protected:
2680 apiv::VUnaryLayer* mImpl;
2681 virtual ~IUnaryLayer() noexcept = 0;
2682};
2683
2684inline IUnaryLayer::~IUnaryLayer() noexcept = default;
2685
2709enum class ReduceOperation : int32_t
2710{
2711 kSUM = 0,
2712 kPROD = 1,
2713 kMAX = 2,
2714 kMIN = 3,
2715 kAVG = 4,
2716 kNONE = 5,
2717};
2718
2724template <>
2726{
2727 static constexpr int32_t kVALUE = 6;
2728};
2729
2737enum class CollectiveOperation : int32_t
2738{
2739 kALL_REDUCE = 0,
2740 kALL_GATHER = 1,
2741 kBROADCAST = 2,
2742 kREDUCE = 3,
2743 kREDUCE_SCATTER = 4,
2744 kALL_TO_ALL = 5,
2745 kGATHER = 6,
2746 kSCATTER = 7,
2747};
2748
2754template <>
2756{
2757 static constexpr int32_t kVALUE = 8;
2758};
2759
2767class IReduceLayer : public ILayer
2768{
2769public:
2776 {
2777 mImpl->setOperation(op);
2778 }
2779
2786 {
2787 return mImpl->getOperation();
2788 }
2789
2795 void setReduceAxes(uint32_t reduceAxes) noexcept
2796 {
2797 mImpl->setReduceAxes(reduceAxes);
2798 }
2799
2805 uint32_t getReduceAxes() const noexcept
2806 {
2807 return mImpl->getReduceAxes();
2808 }
2809
2815 void setKeepDimensions(bool keepDimensions) noexcept
2816 {
2817 mImpl->setKeepDimensions(keepDimensions);
2818 }
2819
2825 bool getKeepDimensions() const noexcept
2826 {
2827 return mImpl->getKeepDimensions();
2828 }
2829
2830protected:
2831 apiv::VReduceLayer* mImpl;
2832 virtual ~IReduceLayer() noexcept = 0;
2833};
2834
2835inline IReduceLayer::~IReduceLayer() noexcept = default;
2836
2849class IPaddingLayer : public ILayer
2850{
2851public:
2861 void setPrePaddingNd(Dims const& padding) noexcept
2862 {
2863 mImpl->setPrePaddingNd(padding);
2864 }
2865
2873 Dims getPrePaddingNd() const noexcept
2874 {
2875 return mImpl->getPrePaddingNd();
2876 }
2877
2887 void setPostPaddingNd(Dims const& padding) noexcept
2888 {
2889 mImpl->setPostPaddingNd(padding);
2890 }
2891
2899 Dims getPostPaddingNd() const noexcept
2900 {
2901 return mImpl->getPostPaddingNd();
2902 }
2903
2904protected:
2905 apiv::VPaddingLayer* mImpl;
2906 virtual ~IPaddingLayer() noexcept = 0;
2907};
2908
2909inline IPaddingLayer::~IPaddingLayer() noexcept = default;
2910
2917{
2924 int32_t order[Dims::MAX_DIMS];
2925};
2926
2939class IShuffleLayer : public ILayer
2940{
2941public:
2951 void setFirstTranspose(Permutation permutation) noexcept
2952 {
2953 mImpl->setFirstTranspose(permutation);
2954 }
2955
2964 {
2965 return mImpl->getFirstTranspose();
2966 }
2967
2991 void setReshapeDimensions(Dims const& dimensions) noexcept
2992 {
2993 mImpl->setReshapeDimensions(dimensions);
2994 }
2995
3005 {
3006 return mImpl->getReshapeDimensions();
3007 }
3008
3014 //
3037 using ILayer::setInput;
3038
3051 void setSecondTranspose(Permutation permutation) noexcept
3052 {
3053 mImpl->setSecondTranspose(permutation);
3054 }
3055
3064 {
3065 return mImpl->getSecondTranspose();
3066 }
3067
3079 void setZeroIsPlaceholder(bool zeroIsPlaceholder) noexcept
3080 {
3081 return mImpl->setZeroIsPlaceholder(zeroIsPlaceholder);
3082 }
3083
3092 bool getZeroIsPlaceholder() const noexcept
3093 {
3094 return mImpl->getZeroIsPlaceholder();
3095 }
3096
3097protected:
3098 apiv::VShuffleLayer* mImpl;
3099 virtual ~IShuffleLayer() noexcept = 0;
3100};
3101
3102inline IShuffleLayer::~IShuffleLayer() noexcept = default;
3103
3109enum class SampleMode : int32_t
3110{
3111 kSTRICT_BOUNDS = 0,
3112 kWRAP = 1,
3113 kCLAMP = 2,
3114 kFILL = 3,
3115 kREFLECT = 4,
3118};
3119
3125template <>
3127{
3128 static constexpr int32_t kVALUE = 5;
3129};
3130
3193class ISliceLayer : public ILayer
3194{
3195public:
3205 void setStart(Dims const& start) noexcept
3206 {
3207 mImpl->setStart(start);
3208 }
3209
3220 Dims getStart() const noexcept
3221 {
3222 return mImpl->getStart();
3223 }
3224
3234 void setSize(Dims const& size) noexcept
3235 {
3236 return mImpl->setSize(size);
3237 }
3238
3249 Dims getSize() const noexcept
3250 {
3251 return mImpl->getSize();
3252 }
3253
3263 void setStride(Dims const& stride) noexcept
3264 {
3265 mImpl->setStride(stride);
3266 }
3267
3278 Dims getStride() const noexcept
3279 {
3280 return mImpl->getStride();
3281 }
3282
3288 void setMode(SampleMode mode) noexcept
3289 {
3290 mImpl->setMode(mode);
3291 }
3292
3298 SampleMode getMode() const noexcept
3299 {
3300 return mImpl->getMode();
3301 }
3302
3330 using ILayer::setInput;
3331
3341 void setAxes(Dims const& axes) noexcept
3342 {
3343 mImpl->setAxes(axes);
3344 }
3345
3356 Dims getAxes() const noexcept
3357 {
3358 return mImpl->getAxes();
3359 }
3360
3361protected:
3362 apiv::VSliceLayer* mImpl;
3363 virtual ~ISliceLayer() noexcept = 0;
3364};
3365
3366inline ISliceLayer::~ISliceLayer() noexcept = default;
3367
3380class IShapeLayer : public ILayer
3381{
3382protected:
3383 apiv::VShapeLayer* mImpl;
3384 virtual ~IShapeLayer() noexcept = 0;
3385};
3386
3387inline IShapeLayer::~IShapeLayer() noexcept = default;
3388
3394enum class TopKOperation : int32_t
3395{
3396 kMAX = 0,
3397 kMIN = 1,
3398};
3399
3405template <>
3407{
3408 static constexpr int32_t kVALUE = 2;
3409};
3410
3422class ITopKLayer : public ILayer
3423{
3424public:
3430 void setOperation(TopKOperation op) noexcept
3431 {
3432 mImpl->setOperation(op);
3433 }
3434
3441 {
3442 return mImpl->getOperation();
3443 }
3444
3454 void setK(int32_t k) noexcept
3455 {
3456 mImpl->setK(k);
3457 }
3458
3468 int32_t getK() const noexcept
3469 {
3470 return mImpl->getK();
3471 }
3472
3478 void setReduceAxes(uint32_t reduceAxes) noexcept
3479 {
3480 mImpl->setReduceAxes(reduceAxes);
3481 }
3482
3488 uint32_t getReduceAxes() const noexcept
3489 {
3490 return mImpl->getReduceAxes();
3491 }
3492
3507 using ILayer::setInput;
3508
3519 bool setIndicesType(DataType type) noexcept
3520 {
3521 return mImpl->setIndicesType(type);
3522 }
3523
3531 DataType getIndicesType() const noexcept
3532 {
3533 return mImpl->getIndicesType();
3534 }
3535
3536protected:
3537 apiv::VTopKLayer* mImpl;
3538 virtual ~ITopKLayer() noexcept = 0;
3539};
3540
3541inline ITopKLayer::~ITopKLayer() noexcept = default;
3542
3549enum class MatrixOperation : int32_t
3550{
3554 kNONE = 0,
3555
3557 kTRANSPOSE = 1,
3558
3569 kVECTOR = 2,
3570};
3571
3577template <>
3579{
3580 static constexpr int32_t kVALUE = 3;
3581};
3582
3609{
3610public:
3619 void setOperation(int32_t index, MatrixOperation op) noexcept
3620 {
3621 mImpl->setOperation(index, op);
3622 }
3623
3631 MatrixOperation getOperation(int32_t index) const noexcept
3632 {
3633 return mImpl->getOperation(index);
3634 }
3635
3636protected:
3637 apiv::VMatrixMultiplyLayer* mImpl;
3638 virtual ~IMatrixMultiplyLayer() noexcept = 0;
3639};
3640
3641inline IMatrixMultiplyLayer::~IMatrixMultiplyLayer() noexcept = default;
3642
3664class INonZeroLayer : public ILayer
3665{
3666public:
3677 bool setIndicesType(DataType type) noexcept
3678 {
3679 return mImpl->setIndicesType(type);
3680 }
3681
3689 DataType getIndicesType() const noexcept
3690 {
3691 return mImpl->getIndicesType();
3692 }
3693
3694protected:
3695 virtual ~INonZeroLayer() noexcept = 0;
3696 apiv::VNonZeroLayer* mImpl;
3697};
3698
3699inline INonZeroLayer::~INonZeroLayer() noexcept = default;
3700
3716{
3717protected:
3718 apiv::VRaggedSoftMaxLayer* mImpl;
3719 virtual ~IRaggedSoftMaxLayer() noexcept = 0;
3720};
3721
3722inline IRaggedSoftMaxLayer::~IRaggedSoftMaxLayer() noexcept = default;
3723
3768{
3769protected:
3770 apiv::VIdentityLayer* mImpl;
3771 virtual ~IIdentityLayer() noexcept = 0;
3772};
3773
3774inline IIdentityLayer::~IIdentityLayer() noexcept = default;
3775
3782class ICastLayer : public ILayer
3783{
3784public:
3792 void setToType(DataType toType) noexcept
3793 {
3794 mImpl->setToType(toType);
3795 }
3796
3803 DataType getToType() const noexcept
3804 {
3805 return mImpl->getToType();
3806 }
3807
3808protected:
3809 apiv::VCastLayer* mImpl;
3810 virtual ~ICastLayer() noexcept = 0;
3811};
3812
3813inline ICastLayer::~ICastLayer() noexcept = default;
3814
3824{
3825public:
3834 void setWeights(Weights weights) noexcept
3835 {
3836 mImpl->setWeights(weights);
3837 }
3838
3844 Weights getWeights() const noexcept
3845 {
3846 return mImpl->getWeights();
3847 }
3848
3856 void setDimensions(Dims const& dimensions) noexcept
3857 {
3858 mImpl->setDimensions(dimensions);
3859 }
3860
3868 Dims getDimensions() const noexcept
3869 {
3870 return mImpl->getDimensions();
3871 }
3872
3873protected:
3874 apiv::VConstantLayer* mImpl;
3875 virtual ~IConstantLayer() noexcept = 0;
3876};
3877
3878inline IConstantLayer::~IConstantLayer() noexcept = default;
3879
3890{
3891protected:
3892 apiv::VParametricReLULayer* mImpl;
3893 virtual ~IParametricReLULayer() noexcept = 0;
3894};
3895
3896inline IParametricReLULayer::~IParametricReLULayer() noexcept = default;
3897
3903enum class InterpolationMode : int32_t
3904{
3905 kNEAREST = 0,
3906 kLINEAR = 1,
3907 kCUBIC = 2
3908};
3909
3915template <>
3917{
3918 static constexpr int32_t kVALUE = 3;
3919};
3920
3929{
3942 kALIGN_CORNERS = 0,
3943
3950 kASYMMETRIC = 1,
3951
3958 kHALF_PIXEL = 2,
3959};
3960
3966template <>
3968{
3969 static constexpr int32_t kVALUE = 3;
3970};
3971
3979enum class ResizeSelector : int32_t
3980{
3982 kFORMULA = 0,
3983
3985 kUPPER = 1,
3986};
3987
3993template <>
3995{
3996 static constexpr int32_t kVALUE = 2;
3997};
3998
4006enum class ResizeRoundMode : int32_t
4007{
4009 kHALF_UP = 0,
4010
4012 kHALF_DOWN = 1,
4013
4015 kFLOOR = 2,
4016
4018 kCEIL = 3,
4019};
4020
4026template <>
4028{
4029 static constexpr int32_t kVALUE = 4;
4030};
4031
4068class IResizeLayer : public ILayer
4069{
4070public:
4089 void setOutputDimensions(Dims const& dimensions) noexcept
4090 {
4091 return mImpl->setOutputDimensions(dimensions);
4092 }
4093
4099 Dims getOutputDimensions() const noexcept
4100 {
4101 return mImpl->getOutputDimensions();
4102 }
4103
4129 void setScales(float const* scales, int32_t nbScales) noexcept
4130 {
4131 mImpl->setScales(scales, nbScales);
4132 }
4133
4148 int32_t getScales(int32_t size, float* scales) const noexcept
4149 {
4150 return mImpl->getScales(size, scales);
4151 }
4152
4160 void setResizeMode(InterpolationMode interpolationMode) noexcept
4161 {
4162 mImpl->setResizeMode(interpolationMode);
4163 }
4164
4171 {
4172 return mImpl->getResizeMode();
4173 }
4174
4194 using ILayer::setInput;
4195
4206 {
4207 mImpl->setCoordinateTransformation(coordTransform);
4208 }
4209
4216 {
4217 return mImpl->getCoordinateTransformation();
4218 }
4219
4231 {
4232 mImpl->setSelectorForSinglePixel(selector);
4233 }
4234
4241 {
4242 return mImpl->getSelectorForSinglePixel();
4243 }
4244
4255 {
4256 mImpl->setNearestRounding(value);
4257 }
4258
4265 {
4266 return mImpl->getNearestRounding();
4267 }
4268
4286 void setCubicCoeff(float A) noexcept
4287 {
4288 mImpl->setCubicCoeff(A);
4289 }
4290
4296 float getCubicCoeff() const noexcept
4297 {
4298 return mImpl->getCubicCoeff();
4299 }
4300
4309 void setExcludeOutside(bool excludeFlag) noexcept
4310 {
4311 mImpl->setExcludeOutside(excludeFlag);
4312 }
4313
4319 bool getExcludeOutside() const noexcept
4320 {
4321 return mImpl->getExcludeOutside();
4322 }
4323
4324protected:
4325 virtual ~IResizeLayer() noexcept = 0;
4326 apiv::VResizeLayer* mImpl;
4327};
4328
4329inline IResizeLayer::~IResizeLayer() noexcept = default;
4330
4336enum class LoopOutput : int32_t
4337{
4339 kLAST_VALUE = 0,
4340
4342 kCONCATENATE = 1,
4343
4345 kREVERSE = 2
4346};
4347
4353template <>
4355{
4356 static constexpr int32_t kVALUE = 3;
4357};
4358
4364enum class TripLimit : int32_t
4365{
4366
4367 kCOUNT = 0,
4368 kWHILE = 1
4369};
4370
4376template <>
4378{
4379 static constexpr int32_t kVALUE = 2;
4380};
4381
4382class ILoop;
4383
4398{
4399public:
4403 ILoop* getLoop() const noexcept
4404 {
4405 return mBoundary->getLoop();
4406 }
4407
4408protected:
4409 virtual ~ILoopBoundaryLayer() noexcept = 0;
4410 apiv::VLoopBoundaryLayer* mBoundary;
4411};
4412
4413inline ILoopBoundaryLayer::~ILoopBoundaryLayer() noexcept = default;
4414
4423{
4424public:
4429 {
4430 return mBoundary->getConditional();
4431 }
4432
4433protected:
4434 virtual ~IIfConditionalBoundaryLayer() noexcept = 0;
4435 apiv::VConditionalBoundaryLayer* mBoundary;
4436};
4437
4438inline IIfConditionalBoundaryLayer::~IIfConditionalBoundaryLayer() noexcept = default;
4439
4446{
4447public:
4448protected:
4449 virtual ~IConditionLayer() noexcept = 0;
4450 apiv::VConditionLayer* mImpl;
4451};
4452
4453inline IConditionLayer::~IConditionLayer() noexcept = default;
4454
4465{
4466public:
4467protected:
4468 virtual ~IIfConditionalOutputLayer() noexcept = 0;
4469 apiv::VConditionalOutputLayer* mImpl;
4470};
4471
4472inline IIfConditionalOutputLayer::~IIfConditionalOutputLayer() noexcept = default;
4473
4480{
4481public:
4482protected:
4483 virtual ~IIfConditionalInputLayer() noexcept = 0;
4484 apiv::VConditionalInputLayer* mImpl;
4485};
4486
4487inline IIfConditionalInputLayer::~IIfConditionalInputLayer() noexcept = default;
4488
4514{
4515public:
4526 {
4527 return mImpl->setCondition(condition);
4528 }
4529
4543 IIfConditionalOutputLayer* addOutput(ITensor& trueSubgraphOutput, ITensor& falseSubgraphOutput) noexcept
4544 {
4545 return mImpl->addOutput(trueSubgraphOutput, falseSubgraphOutput);
4546 }
4547
4556 {
4557 return mImpl->addInput(input);
4558 }
4559
4570 void setName(char const* name) noexcept
4571 {
4572 mImpl->setName(name);
4573 }
4574
4580 char const* getName() const noexcept
4581 {
4582 return mImpl->getName();
4583 }
4584
4585protected:
4586 virtual ~IIfConditional() noexcept = 0;
4587 apiv::VIfConditional* mImpl;
4588};
4589
4590inline IIfConditional::~IIfConditional() noexcept = default;
4591
4600{
4601public:
4607 //
4620 using ILayer::setInput;
4621
4622protected:
4623 virtual ~IRecurrenceLayer() noexcept = 0;
4624 apiv::VRecurrenceLayer* mImpl;
4625};
4626
4627inline IRecurrenceLayer::~IRecurrenceLayer() noexcept = default;
4628
4649{
4650public:
4654 LoopOutput getLoopOutput() const noexcept
4655 {
4656 return mImpl->getLoopOutput();
4657 }
4658
4671 void setAxis(int32_t axis) noexcept
4672 {
4673 mImpl->setAxis(axis);
4674 }
4675
4679 int32_t getAxis() const noexcept
4680 {
4681 return mImpl->getAxis();
4682 }
4683
4689 //
4704 using ILayer::setInput;
4705
4706protected:
4707 virtual ~ILoopOutputLayer() noexcept = 0;
4708 apiv::VLoopOutputLayer* mImpl;
4709};
4710
4711inline ILoopOutputLayer::~ILoopOutputLayer() noexcept = default;
4712
4725{
4726public:
4730 TripLimit getTripLimit() const noexcept
4731 {
4732 return mImpl->getTripLimit();
4733 }
4734
4735protected:
4736 virtual ~ITripLimitLayer() noexcept = 0;
4737 apiv::VTripLimitLayer* mImpl;
4738};
4739
4740inline ITripLimitLayer::~ITripLimitLayer() noexcept = default;
4741
4753{
4754public:
4758 void setAxis(int32_t axis) noexcept
4759 {
4760 mImpl->setAxis(axis);
4761 }
4762
4766 int32_t getAxis() const noexcept
4767 {
4768 return mImpl->getAxis();
4769 }
4770
4780 void setReverse(bool reverse) noexcept
4781 {
4782 mImpl->setReverse(reverse);
4783 }
4784
4790 bool getReverse() const noexcept
4791 {
4792 return mImpl->getReverse();
4793 }
4794
4795protected:
4796 virtual ~IIteratorLayer() noexcept = 0;
4797 apiv::VIteratorLayer* mImpl;
4798};
4799
4800inline IIteratorLayer::~IIteratorLayer() noexcept = default;
4801
4812class ILoop : public INoCopy
4813{
4814public:
4821 IRecurrenceLayer* addRecurrence(ITensor& initialValue) noexcept
4822 {
4823 return mImpl->addRecurrence(initialValue);
4824 }
4825
4843 {
4844 return mImpl->addTripLimit(tensor, limit);
4845 }
4846
4855 IIteratorLayer* addIterator(ITensor& tensor, int32_t axis = 0, bool reverse = false) noexcept
4856 {
4857 return mImpl->addIterator(tensor, axis, reverse);
4858 }
4859
4868 ILoopOutputLayer* addLoopOutput(ITensor& tensor, LoopOutput outputKind, int32_t axis = 0) noexcept
4869 {
4870 return mImpl->addLoopOutput(tensor, outputKind, axis);
4871 }
4872
4883 void setName(char const* name) noexcept
4884 {
4885 mImpl->setName(name);
4886 }
4887
4893 char const* getName() const noexcept
4894 {
4895 return mImpl->getName();
4896 }
4897
4898protected:
4899 virtual ~ILoop() noexcept = 0;
4900 apiv::VLoop* mImpl;
4901};
4902
4903inline ILoop::~ILoop() noexcept = default;
4904
4917class ISelectLayer : public ILayer
4918{
4919protected:
4920 virtual ~ISelectLayer() noexcept = 0;
4921 apiv::VSelectLayer* mImpl;
4922};
4923
4924inline ISelectLayer::~ISelectLayer() noexcept = default;
4925
4942{
4943public:
4952 void setMessage(char const* message) noexcept
4953 {
4954 mImpl->setMessage(message);
4955 }
4956
4962 char const* getMessage() const noexcept
4963 {
4964 return mImpl->getMessage();
4965 }
4966
4967protected:
4968 virtual ~IAssertionLayer() noexcept = 0;
4969
4970 apiv::VAssertionLayer* mImpl;
4971};
4972
4973inline IAssertionLayer::~IAssertionLayer() noexcept = default;
4974
4982enum class FillOperation : int32_t
4983{
5000 kLINSPACE = 0,
5001
5004 kRANDOM_UNIFORM = 1,
5005
5008 kRANDOM_NORMAL = 2
5009};
5010
5016template <>
5018{
5019 static constexpr int32_t kVALUE = 3;
5020};
5021
5057class IFillLayer : public ILayer
5058{
5059public:
5068 //
5069 void setDimensions(Dims const& dimensions) noexcept
5070 {
5071 mImpl->setDimensions(dimensions);
5072 }
5073
5084 Dims getDimensions() const noexcept
5085 {
5086 return mImpl->getDimensions();
5087 }
5088
5094 void setOperation(FillOperation op) noexcept
5095 {
5096 mImpl->setOperation(op);
5097 }
5098
5105 {
5106 return mImpl->getOperation();
5107 }
5108
5122 //
5123 void setAlpha(double alpha) noexcept
5124 {
5125 mImpl->setAlpha(alpha);
5126 }
5127
5138 double getAlpha() const noexcept
5139 {
5140 return mImpl->getAlpha();
5141 }
5142
5157 void setBeta(double beta) noexcept
5158 {
5159 mImpl->setBeta(beta);
5160 }
5161
5172 double getBeta() const noexcept
5173 {
5174 return mImpl->getBeta();
5175 }
5176
5217 using ILayer::setInput;
5218
5232 //
5233 void setAlphaInt64(int64_t alpha) noexcept
5234 {
5235 mImpl->setAlphaInt64(alpha);
5236 }
5237
5248 int64_t getAlphaInt64() const noexcept
5249 {
5250 return mImpl->getAlphaInt64();
5251 }
5252
5267 void setBetaInt64(int64_t beta) noexcept
5268 {
5269 mImpl->setBetaInt64(beta);
5270 }
5271
5282 int64_t getBetaInt64() const noexcept
5283 {
5284 return mImpl->getBetaInt64();
5285 }
5286
5290 bool isAlphaBetaInt64() const noexcept
5291 {
5292 return mImpl->isAlphaBetaInt64();
5293 }
5294
5308 void setToType(DataType toType) noexcept
5309 {
5310 mImpl->setToType(toType);
5311 }
5312
5320 DataType getToType() const noexcept
5321 {
5322 return mImpl->getToType();
5323 }
5324
5325protected:
5326 virtual ~IFillLayer() noexcept = 0;
5327 apiv::VFillLayer* mImpl;
5328};
5329
5330inline IFillLayer::~IFillLayer() noexcept = default;
5331
5407{
5408public:
5417 int32_t getAxis() const noexcept
5418 {
5419 return mImpl->getAxis();
5420 }
5428 void setAxis(int32_t axis) noexcept
5429 {
5430 mImpl->setAxis(axis);
5431 }
5432
5441 bool setBlockShape(Dims const& blockShape) noexcept
5442 {
5443 return mImpl->setBlockShape(blockShape);
5444 }
5445
5453 {
5454 return mImpl->getBlockShape();
5455 }
5456
5468 void setToType(DataType toType) noexcept
5469 {
5470 mImpl->setToType(toType);
5471 }
5472
5480 DataType getToType() const noexcept
5481 {
5482 return mImpl->getToType();
5483 }
5484
5485protected:
5486 virtual ~IQuantizeLayer() noexcept = 0;
5487 apiv::VQuantizeLayer* mImpl;
5488};
5489
5490inline IQuantizeLayer::~IQuantizeLayer() noexcept = default;
5491
5561{
5562public:
5571 int32_t getAxis() const noexcept
5572 {
5573 return mImpl->getAxis();
5574 }
5582 void setAxis(int32_t axis) noexcept
5583 {
5584 mImpl->setAxis(axis);
5585 }
5586
5599 bool setBlockShape(Dims const& blockShape) noexcept
5600 {
5601 return mImpl->setBlockShape(blockShape);
5602 }
5603
5611 {
5612 return mImpl->getBlockShape();
5613 }
5614
5626 void setToType(DataType toType) noexcept
5627 {
5628 mImpl->setToType(toType);
5629 }
5630
5638 DataType getToType() const noexcept
5639 {
5640 return mImpl->getToType();
5641 }
5642
5643protected:
5644 virtual ~IDequantizeLayer() noexcept = 0;
5645 apiv::VDequantizeLayer* mImpl;
5646};
5647
5648inline IDequantizeLayer::~IDequantizeLayer() noexcept = default;
5649
5668{
5669public:
5681 using ILayer::setInput;
5682
5695 void setToType(DataType toType) noexcept
5696 {
5697 mImpl->setToType(toType);
5698 }
5699
5708 DataType getToType() const noexcept
5709 {
5710 return mImpl->getToType();
5711 }
5712
5721 void setScaleType(DataType scaleType) noexcept
5722 {
5723 mImpl->setScaleType(scaleType);
5724 }
5725
5734 DataType getScaleType() const noexcept
5735 {
5736 return mImpl->getScaleType();
5737 }
5738
5747 TRT_DEPRECATED void setAxis(int32_t axis) noexcept
5748 {
5749 mImpl->setAxis(axis);
5750 }
5751
5757 TRT_DEPRECATED int32_t getAxis() const noexcept
5758 {
5759 return mImpl->getAxis();
5760 }
5761
5770 TRT_DEPRECATED void setBlockSize(int32_t size) noexcept
5771 {
5772 mImpl->setBlockSize(size);
5773 }
5774
5780 TRT_DEPRECATED int32_t getBlockSize() const noexcept
5781 {
5782 return mImpl->getBlockSize();
5783 }
5784
5793 void setBlockShape(Dims const& blockShape) noexcept
5794 {
5795 mImpl->setBlockShape(blockShape);
5796 }
5797
5805 Dims getBlockShape() const noexcept
5806 {
5807 return mImpl->getBlockShape();
5808 }
5809
5810protected:
5811 virtual ~IDynamicQuantizeLayer() noexcept = 0;
5812 apiv::VDynamicQuantizeLayer* mImpl;
5813};
5814
5815inline IDynamicQuantizeLayer::~IDynamicQuantizeLayer() noexcept = default;
5816
5851class IEinsumLayer : public ILayer
5852{
5853public:
5863 bool setEquation(char const* equation) noexcept
5864 {
5865 return mImpl->setEquation(equation);
5866 }
5867
5873 char const* getEquation() const noexcept
5874 {
5875 return mImpl->getEquation();
5876 }
5877
5878protected:
5879 virtual ~IEinsumLayer() noexcept = 0;
5880 apiv::VEinsumLayer* mImpl;
5881};
5882
5883inline IEinsumLayer::~IEinsumLayer() noexcept = default;
5884
5892enum class ScatterMode : int32_t
5893{
5894 kELEMENT = 0,
5895 kND = 1,
5896};
5897
5903template <>
5905{
5906 static constexpr int32_t kVALUE = 2;
5907};
5908
5966class IScatterLayer : public ILayer
5967{
5968public:
5974 void setMode(ScatterMode mode) noexcept
5975 {
5976 mImpl->setMode(mode);
5977 }
5978
5984 ScatterMode getMode() const noexcept
5985 {
5986 return mImpl->getMode();
5987 }
5988
5994 void setAxis(int32_t axis) noexcept
5995 {
5996 mImpl->setAxis(axis);
5997 }
5998
6002 int32_t getAxis() const noexcept
6003 {
6004 return mImpl->getAxis();
6005 }
6006
6007protected:
6008 apiv::VScatterLayer* mImpl;
6009 virtual ~IScatterLayer() noexcept = 0;
6010}; // class IScatterLayer
6011
6012inline IScatterLayer::~IScatterLayer() noexcept = default;
6013
6041class IOneHotLayer : public ILayer
6042{
6043public:
6049 void setAxis(int32_t axis) noexcept
6050 {
6051 mImpl->setAxis(axis);
6052 }
6053
6057 int32_t getAxis() const noexcept
6058 {
6059 return mImpl->getAxis();
6060 }
6061
6062protected:
6063 apiv::VOneHotLayer* mImpl;
6064 virtual ~IOneHotLayer() noexcept = 0;
6065};
6066
6067inline IOneHotLayer::~IOneHotLayer() noexcept = default;
6068
6081{
6082public:
6089 {
6090 mImpl->setInterpolationMode(mode);
6091 }
6092
6101 {
6102 return mImpl->getInterpolationMode();
6103 }
6104
6110 void setAlignCorners(bool alignCorners) noexcept
6111 {
6112 mImpl->setAlignCorners(alignCorners);
6113 }
6114
6122 bool getAlignCorners() const noexcept
6123 {
6124 return mImpl->getAlignCorners();
6125 }
6126
6134 bool setSampleMode(SampleMode mode) noexcept
6135 {
6136 return mImpl->setSampleMode(mode);
6137 }
6138
6146 SampleMode getSampleMode() const noexcept
6147 {
6148 return mImpl->getSampleMode();
6149 }
6150
6151protected:
6152 apiv::VGridSampleLayer* mImpl;
6153 virtual ~IGridSampleLayer() noexcept = 0;
6154}; // class IGridSampleLayer
6155
6156inline IGridSampleLayer::~IGridSampleLayer() noexcept = default;
6157
6165enum class BoundingBoxFormat : int32_t
6166{
6168 kCORNER_PAIRS = 0,
6170 kCENTER_SIZES = 1
6171};
6172
6178template <>
6180{
6181 static constexpr int32_t kVALUE = 2;
6182};
6183
6234class INMSLayer : public ILayer
6235{
6236public:
6247 {
6248 mImpl->setBoundingBoxFormat(fmt);
6249 }
6250
6259 {
6260 return mImpl->getBoundingBoxFormat();
6261 }
6262
6272 void setTopKBoxLimit(int32_t limit) noexcept
6273 {
6274 mImpl->setTopKBoxLimit(limit);
6275 }
6276
6282 int32_t getTopKBoxLimit() const noexcept
6283 {
6284 return mImpl->getTopKBoxLimit();
6285 }
6286
6305 using ILayer::setInput;
6306
6317 bool setIndicesType(DataType type) noexcept
6318 {
6319 return mImpl->setIndicesType(type);
6320 }
6321
6329 DataType getIndicesType() const noexcept
6330 {
6331 return mImpl->getIndicesType();
6332 }
6333
6334protected:
6335 apiv::VNMSLayer* mImpl;
6336 virtual ~INMSLayer() noexcept = 0;
6337}; // class INMSLayer
6338
6339inline INMSLayer::~INMSLayer() noexcept = default;
6340
6354{
6355public:
6364 void setBatchAxis(int32_t batchAxis) noexcept
6365 {
6366 mImpl->setBatchAxis(batchAxis);
6367 }
6368
6374 int32_t getBatchAxis() const noexcept
6375 {
6376 return mImpl->getBatchAxis();
6377 }
6378
6387 void setSequenceAxis(int32_t sequenceAxis) noexcept
6388 {
6389 mImpl->setSequenceAxis(sequenceAxis);
6390 }
6391
6397 int32_t getSequenceAxis() const noexcept
6398 {
6399 return mImpl->getSequenceAxis();
6400 }
6401
6402protected:
6403 apiv::VReverseSequenceLayer* mImpl;
6404 virtual ~IReverseSequenceLayer() noexcept = 0;
6405}; // class IReverseSequenceLayer
6406
6407inline IReverseSequenceLayer::~IReverseSequenceLayer() noexcept = default;
6408
6428{
6429public:
6437 void setEpsilon(float eps) noexcept
6438 {
6439 return mImpl->setEpsilon(eps);
6440 }
6441
6447 float getEpsilon() const noexcept
6448 {
6449 return mImpl->getEpsilon();
6450 }
6451
6457 void setAxes(uint32_t axesMask) noexcept
6458 {
6459 return mImpl->setAxes(axesMask);
6460 }
6461
6467 uint32_t getAxes() const noexcept
6468 {
6469 return mImpl->getAxes();
6470 }
6471
6488 void setNbGroups(int64_t nbGroups) noexcept
6489 {
6490 return mImpl->setNbGroups(nbGroups);
6491 }
6492
6498 int64_t getNbGroups() const noexcept
6499 {
6500 return mImpl->getNbGroups();
6501 }
6502
6503
6509 TRT_NODISCARD bool isV2() const noexcept
6510 {
6511 return mImpl->isV2();
6512 }
6513
6514protected:
6515 apiv::VNormalizationLayer* mImpl;
6516 virtual ~INormalizationLayer() noexcept = 0;
6517};
6518
6519inline INormalizationLayer::~INormalizationLayer() noexcept = default;
6520
6521
6530class ISqueezeLayer : public ILayer
6531{
6532public:
6545 using ILayer::setInput;
6546
6547protected:
6548 apiv::VSqueezeLayer* mImpl;
6549 virtual ~ISqueezeLayer() noexcept = 0;
6550};
6551
6552inline ISqueezeLayer::~ISqueezeLayer() noexcept = default;
6553
6563{
6564public:
6578 using ILayer::setInput;
6579
6580protected:
6581 apiv::VUnsqueezeLayer* mImpl;
6582 virtual ~IUnsqueezeLayer() noexcept = 0;
6583};
6584
6585inline IUnsqueezeLayer::~IUnsqueezeLayer() noexcept = default;
6586
6598enum class CumulativeOperation : int32_t
6599{
6600 kSUM = 0,
6601};
6602
6608template <>
6610{
6611 static constexpr int32_t kVALUE = 1;
6612};
6613
6642{
6643public:
6654 {
6655 return mImpl->setOperation(op);
6656 }
6657
6666 {
6667 return mImpl->getOperation();
6668 }
6669
6677 void setExclusive(bool exclusive) noexcept
6678 {
6679 mImpl->setExclusive(exclusive);
6680 }
6681
6689 bool getExclusive() const noexcept
6690 {
6691 return mImpl->getExclusive();
6692 }
6693
6701 void setReverse(bool reverse) noexcept
6702 {
6703 mImpl->setReverse(reverse);
6704 }
6705
6713 bool getReverse() const noexcept
6714 {
6715 return mImpl->getReverse();
6716 }
6717
6718protected:
6719 apiv::VCumulativeLayer* mImpl;
6720 virtual ~ICumulativeLayer() noexcept = 0;
6721};
6722
6723inline ICumulativeLayer::~ICumulativeLayer() noexcept = default;
6724
6730enum class AttentionNormalizationOp : int32_t
6731{
6732 kNONE
6733 = 0,
6734 kSOFTMAX = 1,
6735};
6736
6742template <>
6744{
6745 static constexpr int32_t kVALUE = 2;
6746};
6747
6762enum class CausalMaskKind : int32_t
6763{
6765 kNONE = 0,
6766
6768 kUPPER_LEFT = 1,
6769
6771 kLOWER_RIGHT = 2,
6772};
6773
6779template <>
6781{
6782 static constexpr int32_t kVALUE = 3;
6783};
6784
6790enum class AttentionIOForm : int32_t
6791{
6794 kPADDED_BHND = 0,
6797 kPACKED_NHD = 1,
6798};
6799
6805template <>
6807{
6808 static constexpr int32_t kVALUE = 2;
6809};
6810
6821{
6822public:
6826 IAttention* getAttention() const noexcept
6827 {
6828 return mBoundary->getAttention();
6829 }
6830
6831protected:
6832 virtual ~IAttentionBoundaryLayer() noexcept = 0;
6833 apiv::VAttentionBoundaryLayer* mBoundary;
6834};
6835
6836inline IAttentionBoundaryLayer::~IAttentionBoundaryLayer() noexcept = default;
6837
6849{
6850public:
6868 using ILayer::setInput;
6869
6870protected:
6871 virtual ~IAttentionInputLayer() noexcept = 0;
6872 apiv::VAttentionInputLayer* mImpl;
6873};
6874
6875inline IAttentionInputLayer::~IAttentionInputLayer() noexcept = default;
6876
6888{
6889public:
6890protected:
6891 virtual ~IAttentionOutputLayer() noexcept = 0;
6892 apiv::VAttentionOutputLayer* mImpl;
6893};
6894
6895inline IAttentionOutputLayer::~IAttentionOutputLayer() noexcept = default;
6896
6957class IAttention : public INoCopy
6958{
6959public:
6968 {
6969 return mImpl->setNormalizationOperation(op);
6970 }
6971
6980 {
6981 return mImpl->getNormalizationOperation();
6982 }
6983
6996 bool setMask(ITensor& mask) noexcept
6997 {
6998 return mImpl->setMask(mask);
6999 }
7000
7008 ITensor* getMask() noexcept
7009 {
7010 return mImpl->getMask();
7011 }
7012
7026 TRT_DEPRECATED bool setCausal(bool isCausal) noexcept
7027 {
7028 return mImpl->setCausal(isCausal);
7029 }
7030
7040 TRT_DEPRECATED bool getCausal() const noexcept
7041 {
7042 return mImpl->getCausal();
7043 }
7044
7060 bool setCausalKind(CausalMaskKind kind) noexcept
7061 {
7062 return mImpl->setCausalKind(kind);
7063 }
7064
7073 {
7074 return mImpl->getCausalKind();
7075 }
7076
7084 bool setDecomposable(bool decomposable) noexcept
7085 {
7086 return mImpl->setDecomposable(decomposable);
7087 }
7088
7097 bool getDecomposable() const noexcept
7098 {
7099 return mImpl->getDecomposable();
7100 }
7101
7116 bool setInput(int32_t index, ITensor& input) noexcept
7117 {
7118 return mImpl->setInput(index, input);
7119 }
7120
7125 int32_t getNbInputs() const noexcept
7126 {
7127 return mImpl->getNbInputs();
7128 }
7129
7137 ITensor* getInput(int32_t index) const noexcept
7138 {
7139 return mImpl->getInput(index);
7140 }
7141
7145 int32_t getNbOutputs() const noexcept
7146 {
7147 return mImpl->getNbOutputs();
7148 }
7149
7157 ITensor* getOutput(int32_t index) const noexcept
7158 {
7159 return mImpl->getOutput(index);
7160 }
7161
7174 bool setName(char const* name) noexcept
7175 {
7176 return mImpl->setName(name);
7177 }
7178
7186 char const* getName() const noexcept
7187 {
7188 return mImpl->getName();
7189 }
7190
7203 {
7204 return mImpl->setNormalizationQuantizeScale(tensor);
7205 }
7206
7214 {
7215 return mImpl->getNormalizationQuantizeScale();
7216 }
7217
7227 {
7228 return mImpl->setNormalizationQuantizeToType(type);
7229 }
7230
7239 {
7240 return mImpl->getNormalizationQuantizeToType();
7241 }
7242
7258 bool setMetadata(char const* metadata) noexcept
7259 {
7260 return mImpl->setMetadata(metadata);
7261 }
7262
7271 char const* getMetadata() const noexcept
7272 {
7273 return mImpl->getMetadata();
7274 }
7275
7287 bool setNbRanks(int32_t nbRanks) noexcept
7288 {
7289 return mImpl->setNbRanks(nbRanks);
7290 }
7291
7299 int32_t getNbRanks() const noexcept
7300 {
7301 return mImpl->getNbRanks();
7302 }
7303
7317 {
7318 return mImpl->setQueryForm(form);
7319 }
7320
7330 {
7331 return mImpl->getQueryForm();
7332 }
7333
7347 {
7348 return mImpl->setKeyValueForm(form);
7349 }
7350
7360 {
7361 return mImpl->getKeyValueForm();
7362 }
7363
7383 TRT_NODISCARD bool setQueryLengths(ITensor* lengths) noexcept
7384 {
7385 return mImpl->setQueryLengths(lengths);
7386 }
7387
7396 {
7397 return mImpl->getQueryLengths();
7398 }
7399
7423 {
7424 return mImpl->setKeyValueLengths(lengths);
7425 }
7426
7435 {
7436 return mImpl->getKeyValueLengths();
7437 }
7438
7439protected:
7440 apiv::VAttention* mImpl;
7441 virtual ~IAttention() noexcept = 0;
7442};
7443
7444inline IAttention::~IAttention() noexcept = default;
7445
7453{
7454public:
7460 void setInterleaved(bool interleaved) noexcept
7461 {
7462 mImpl->setInterleaved(interleaved);
7463 }
7464
7465
7471 TRT_NODISCARD bool getInterleaved() const noexcept
7472 {
7473 return mImpl->getInterleaved();
7474 }
7475
7476
7482 TRT_NODISCARD bool setRotaryEmbeddingDim(int32_t rotaryEmbeddingDim) noexcept
7483 {
7484 return mImpl->setRotaryEmbeddingDim(rotaryEmbeddingDim);
7485 }
7486
7487
7493 TRT_NODISCARD int32_t getRotaryEmbeddingDim() const noexcept
7494 {
7495 return mImpl->getRotaryEmbeddingDim();
7496 }
7497
7498
7512 using ILayer::setInput;
7513
7514protected:
7515 apiv::VRotaryEmbeddingLayer* mImpl;
7516 virtual ~IRotaryEmbeddingLayer() noexcept = 0;
7517};
7518
7519inline IRotaryEmbeddingLayer::~IRotaryEmbeddingLayer() noexcept = default;
7520
7526enum class KVCacheMode : int32_t
7527{
7528 kLINEAR = 0,
7529};
7530
7536template <>
7538{
7539 static constexpr int32_t kVALUE = 1;
7540};
7541
7562{
7563public:
7578 using ILayer::setInput;
7579
7587 bool setCacheMode(KVCacheMode cacheMode) noexcept
7588 {
7589 return mImpl->setCacheMode(cacheMode);
7590 }
7591
7597 KVCacheMode getCacheMode() const noexcept
7598 {
7599 return mImpl->getCacheMode();
7600 }
7601
7616 {
7617 return mImpl->setUpdateForm(form);
7618 }
7619
7629 {
7630 return mImpl->getUpdateForm();
7631 }
7632
7651 {
7652 return mImpl->setUpdateLengths(lengths);
7653 }
7654
7663 {
7664 return mImpl->getUpdateLengths();
7665 }
7666
7667protected:
7668 apiv::VKVCacheUpdateLayer* mImpl;
7669 virtual ~IKVCacheUpdateLayer() noexcept = 0;
7670};
7671
7672inline IKVCacheUpdateLayer::~IKVCacheUpdateLayer() noexcept = default;
7673
7679enum class MoEActType : int32_t
7680{
7681 kNONE = 0,
7682 kSILU = 1,
7683};
7684
7690template <>
7692{
7693 static constexpr int32_t kVALUE = 2;
7694};
7695
7696
7812class IMoELayer : public ILayer
7813{
7814public:
7826 void setGatedWeights(ITensor& fcGateWeights, ITensor& fcUpWeights, ITensor& fcDownWeights, MoEActType activationType) noexcept
7827 {
7828 mImpl->setGatedWeights(fcGateWeights, fcUpWeights, fcDownWeights, activationType);
7829 }
7830
7838 void setGatedBiases(ITensor& fcGateBiases, ITensor& fcUpBiases, ITensor& fcDownBiases) noexcept
7839 {
7840 mImpl->setGatedBiases(fcGateBiases, fcUpBiases, fcDownBiases);
7841 }
7842
7850 void setActivationType(MoEActType activationType) noexcept
7851 {
7852 mImpl->setActivationType(activationType);
7853 }
7854
7863 {
7864 return mImpl->getActivationType();
7865 }
7866
7888 void setQuantizationStatic(ITensor& fcDownActivationScale, DataType dataType) noexcept
7889 {
7890 mImpl->setQuantizationStatic(fcDownActivationScale, dataType);
7891 }
7892
7921 void setQuantizationDynamicDblQ(ITensor& fcDownActivationDblQScale, DataType dataType, Dims const& blockShape, DataType dynQOutputScaleType) noexcept
7922 {
7923 mImpl->setQuantizationDynamicDblQ(fcDownActivationDblQScale, dataType, blockShape, dynQOutputScaleType);
7924 }
7925
7937 {
7938 mImpl->setQuantizationToType(type);
7939 }
7940
7949 {
7950 return mImpl->getQuantizationToType();
7951 }
7952
7964 void setQuantizationBlockShape(Dims const& blockShape) noexcept
7965 {
7966 mImpl->setQuantizationBlockShape(blockShape);
7967 }
7968
7977 {
7978 return mImpl->getQuantizationBlockShape();
7979 }
7980
7989 {
7990 mImpl->setDynQOutputScaleType(type);
7991 }
7992
8001 {
8002 return mImpl->getDynQOutputScaleType();
8003 }
8004
8021 void setSwigluParams(float limit, float alpha, float beta) noexcept
8022 {
8023 mImpl->setSwigluParams(limit, alpha, beta);
8024 }
8025
8035 void setSwigluParamLimit(float limit) noexcept
8036 {
8037 mImpl->setSwigluParamLimit(limit);
8038 }
8039
8047 float getSwigluParamLimit() const noexcept
8048 {
8049 return mImpl->getSwigluParamLimit();
8050 }
8051
8061 void setSwigluParamAlpha(float alpha) noexcept
8062 {
8063 mImpl->setSwigluParamAlpha(alpha);
8064 }
8065
8073 float getSwigluParamAlpha() const noexcept
8074 {
8075 return mImpl->getSwigluParamAlpha();
8076 }
8077
8087 void setSwigluParamBeta(float beta) noexcept
8088 {
8089 mImpl->setSwigluParamBeta(beta);
8090 }
8091
8099 float getSwigluParamBeta() const noexcept
8100 {
8101 return mImpl->getSwigluParamBeta();
8102 }
8103
8116 void setInput(int32_t index, ITensor& tensor) noexcept
8117 {
8118 mImpl->setInput(index, tensor);
8119 }
8120
8121 using ILayer::setInput;
8122
8123protected:
8124 virtual ~IMoELayer() noexcept = 0;
8125 apiv::VMoELayer* mImpl;
8126};
8127
8128inline IMoELayer::~IMoELayer() noexcept = default;
8129
8138{
8139protected:
8140 virtual ~IDistCollectiveLayer() noexcept = 0;
8141 apiv::VDistCollectiveLayer* mImpl;
8142}; // class IDistCollectiveLayer
8143
8144inline IDistCollectiveLayer::~IDistCollectiveLayer() noexcept = default;
8145
8164{
8165public:
8166 virtual ~INetworkDefinition() noexcept = 0;
8167
8203 ITensor* addInput(char const* name, DataType type, Dims const& dimensions) noexcept
8204 {
8205 return mImpl->addInput(name, type, dimensions);
8206 }
8207
8217 void markOutput(ITensor& tensor) noexcept
8218 {
8219 mImpl->markOutput(tensor);
8220 }
8221
8235 bool markDebug(ITensor& tensor) noexcept
8236 {
8237 return mImpl->markDebug(tensor);
8238 }
8239
8251 bool unmarkDebug(ITensor& tensor) noexcept
8252 {
8253 return mImpl->unmarkDebug(tensor);
8254 }
8255
8261 bool isDebugTensor(ITensor const& tensor) const noexcept
8262 {
8263 return mImpl->isDebugTensor(tensor);
8264 }
8265
8284 {
8285 return mImpl->markUnfusedTensorsAsDebugTensors();
8286 }
8287
8298 {
8299 return mImpl->unmarkUnfusedTensorsAsDebugTensors();
8300 }
8301
8318 {
8319 return mImpl->addActivation(input, type);
8320 }
8321
8336 ILRNLayer* addLRN(ITensor& input, int64_t window, float alpha, float beta, float k) noexcept
8337 {
8338 return mImpl->addLRN(input, window, alpha, beta, k);
8339 }
8340
8362 IScaleLayer* addScale(ITensor& input, ScaleMode mode, Weights shift, Weights scale, Weights power) noexcept
8363 {
8364 return mImpl->addScale(input, mode, shift, scale, power);
8365 }
8366
8376 {
8377 return mImpl->addSoftMax(input);
8378 }
8379
8392 IConcatenationLayer* addConcatenation(ITensor* const* inputs, int32_t nbInputs) noexcept
8393 {
8394 return mImpl->addConcatenation(inputs, nbInputs);
8395 }
8396
8420 {
8421 return mImpl->addElementWise(input1, input2, op);
8422 }
8423
8441 IUnaryLayer* addUnary(ITensor& input, UnaryOperation operation) noexcept
8442 {
8443 return mImpl->addUnary(input, operation);
8444 }
8445
8456 {
8457 return mImpl->addShuffle(input);
8458 }
8459
8472 IOneHotLayer* addOneHot(ITensor& indices, ITensor& values, ITensor& depth, int32_t axis) noexcept
8473 {
8474 return mImpl->addOneHot(indices, values, depth, axis);
8475 }
8476
8484 int32_t getNbLayers() const noexcept
8485 {
8486 return mImpl->getNbLayers();
8487 }
8488
8498 ILayer* getLayer(int32_t index) const noexcept
8499 {
8500 return mImpl->getLayer(index);
8501 }
8502
8510 int32_t getNbInputs() const noexcept
8511 {
8512 return mImpl->getNbInputs();
8513 }
8514
8526 ITensor* getInput(int32_t index) const noexcept
8527 {
8528 return mImpl->getInput(index);
8529 }
8530
8540 int32_t getNbOutputs() const noexcept
8541 {
8542 return mImpl->getNbOutputs();
8543 }
8544
8556 ITensor* getOutput(int32_t index) const noexcept
8557 {
8558 return mImpl->getOutput(index);
8559 }
8560
8583 ITensor& input, ReduceOperation operation, uint32_t reduceAxes, bool keepDimensions) noexcept
8584 {
8585 return mImpl->addReduce(input, operation, reduceAxes, keepDimensions);
8586 }
8587
8618 TRT_DEPRECATED ITopKLayer* addTopK(ITensor& input, TopKOperation op, int32_t k, uint32_t reduceAxes) noexcept
8619 {
8620 return mImpl->addTopK(input, op, k, reduceAxes);
8621 }
8622
8651 ITopKLayer* addTopK(ITensor& input, TopKOperation op, int32_t k, uint32_t reduceAxes, DataType indicesType) noexcept
8652 {
8653 return mImpl->addTopKV2(input, op, k, reduceAxes, indicesType);
8654 }
8655
8667 IGatherLayer* addGather(ITensor& data, ITensor& indices, int32_t axis) noexcept
8668 {
8669 return mImpl->addGather(data, indices, axis);
8670 }
8671
8683 IGatherLayer* addGatherV2(ITensor& data, ITensor& indices, GatherMode mode) noexcept
8684 {
8685 return mImpl->addGatherV2(data, indices, mode);
8686 }
8687
8703 {
8704 return mImpl->addRaggedSoftMax(input, bounds);
8705 }
8706
8724 ITensor& input0, MatrixOperation op0, ITensor& input1, MatrixOperation op1) noexcept
8725 {
8726 return mImpl->addMatrixMultiply(input0, op0, input1, op1);
8727 }
8728
8743 {
8744 return mImpl->addNonZero(input);
8745 }
8746
8758 INonZeroLayer* addNonZero(ITensor& input, DataType indicesType) noexcept
8759 {
8760 return mImpl->addNonZeroV2(input, indicesType);
8761 }
8762
8782 IConstantLayer* addConstant(Dims const& dimensions, Weights weights) noexcept
8783 {
8784 return mImpl->addConstant(dimensions, weights);
8785 }
8786
8797 {
8798 return mImpl->addIdentity(input);
8799 }
8800
8811 ICastLayer* addCast(ITensor& input, DataType toType) noexcept
8812 {
8813 return mImpl->addCast(input, toType);
8814 }
8815
8826 void removeTensor(ITensor& tensor) noexcept
8827 {
8828 mImpl->removeTensor(tensor);
8829 }
8830
8838 void unmarkOutput(ITensor& tensor) noexcept
8839 {
8840 mImpl->unmarkOutput(tensor);
8841 }
8842
8859 TRT_DEPRECATED IPluginV2Layer* addPluginV2(ITensor* const* inputs, int32_t nbInputs, IPluginV2& plugin) noexcept
8860 {
8861 return mImpl->addPluginV2(inputs, nbInputs, plugin);
8862 }
8863
8877 IPluginV3Layer* addPluginV3(ITensor* const* inputs, int32_t nbInputs, ITensor* const* shapeInputs,
8878 int32_t nbShapeInputs, IPluginV3& plugin) noexcept
8879 {
8880 return mImpl->addPluginV3(inputs, nbInputs, shapeInputs, nbShapeInputs, plugin);
8881 }
8882
8897 ISliceLayer* addSlice(ITensor& input, Dims const& start, Dims const& size, Dims const& stride) noexcept
8898 {
8899 return mImpl->addSlice(input, start, size, stride);
8900 }
8901
8921 void setName(char const* name) noexcept
8922 {
8923 mImpl->setName(name);
8924 }
8925
8935 char const* getName() const noexcept
8936 {
8937 return mImpl->getName();
8938 }
8939
8951 IShapeLayer* addShape(ITensor& input) noexcept
8952 {
8953 return mImpl->addShape(input);
8954 }
8955
8966 {
8967 return mImpl->hasImplicitBatchDimension();
8968 }
8969
8976 {
8977 return mImpl->getFlags();
8978 }
8979
8987 bool getFlag(NetworkDefinitionCreationFlag networkDefinitionCreationFlag) const noexcept
8988 {
8989 return mImpl->getFlag(networkDefinitionCreationFlag);
8990 }
8991
9004 bool markOutputForShapes(ITensor& tensor) noexcept
9005 {
9006 return mImpl->markOutputForShapes(tensor);
9007 }
9008
9016 bool unmarkOutputForShapes(ITensor& tensor) noexcept
9017 {
9018 return mImpl->unmarkOutputForShapes(tensor);
9019 }
9020
9035 {
9036 return mImpl->addParametricReLU(input, slope);
9037 }
9038
9057 ITensor& input, int64_t nbOutputMaps, Dims const& kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
9058 {
9059 return mImpl->addConvolutionNd(input, nbOutputMaps, kernelSize, kernelWeights, biasWeights);
9060 }
9061
9076 IPoolingLayer* addPoolingNd(ITensor& input, PoolingType type, Dims const& windowSize) noexcept
9077 {
9078 return mImpl->addPoolingNd(input, type, windowSize);
9079 }
9080
9095 //
9099 ITensor& input, int64_t nbOutputMaps, Dims kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
9100 {
9101 return mImpl->addDeconvolutionNd(input, nbOutputMaps, kernelSize, kernelWeights, biasWeights);
9102 }
9103
9136 ITensor& input, ScaleMode mode, Weights shift, Weights scale, Weights power, int32_t channelAxis) noexcept
9137 {
9138 return mImpl->addScaleNd(input, mode, shift, scale, power, channelAxis);
9139 }
9140
9153 {
9154 return mImpl->addResize(input);
9155 }
9156
9166 ILoop* addLoop() noexcept
9167 {
9168 return mImpl->addLoop();
9169 }
9170
9182 {
9183 return mImpl->addIfConditional();
9184 }
9185
9220 ISelectLayer* addSelect(ITensor& condition, ITensor& thenInput, ITensor& elseInput) noexcept
9221 {
9222 return mImpl->addSelect(condition, thenInput, elseInput);
9223 }
9224
9237 IAssertionLayer* addAssertion(ITensor& condition, char const* message) noexcept
9238 {
9239 return mImpl->addAssertion(condition, message);
9240 }
9241
9263 IFillLayer* addFill(Dims const& dimensions, FillOperation op, DataType outputType) noexcept
9264 {
9265 return mImpl->addFillV2(dimensions, op, outputType);
9266 }
9267
9279 IPaddingLayer* addPaddingNd(ITensor& input, Dims const& prePadding, Dims const& postPadding) noexcept
9280 {
9281 return mImpl->addPaddingNd(input, prePadding, postPadding);
9282 }
9283
9303 bool setWeightsName(Weights weights, char const* name) noexcept
9304 {
9305 return mImpl->setWeightsName(weights, name);
9306 }
9307
9319 //
9322 void setErrorRecorder(IErrorRecorder* recorder) noexcept
9323 {
9324 mImpl->setErrorRecorder(recorder);
9325 }
9326
9338 {
9339 return mImpl->getErrorRecorder();
9340 }
9341
9360 IDequantizeLayer* addDequantize(ITensor& input, ITensor& scale, DataType outputType) noexcept
9361 {
9362 return mImpl->addDequantizeV2(input, scale, outputType);
9363 }
9364
9380 IScatterLayer* addScatter(ITensor& data, ITensor& indices, ITensor& updates, ScatterMode mode) noexcept
9381 {
9382 return mImpl->addScatter(data, indices, updates, mode);
9383 }
9384
9404 IQuantizeLayer* addQuantize(ITensor& input, ITensor& scale, DataType outputType) noexcept
9405 {
9406 return mImpl->addQuantizeV2(input, scale, outputType);
9407 }
9408
9432 ITensor& input, int32_t axis, int32_t blockSize, DataType outputType, DataType scaleType) noexcept
9433 {
9434 return mImpl->addDynamicQuantize(input, axis, blockSize, outputType, scaleType);
9435 }
9436
9456 ITensor& input, Dims const& blockShape, DataType outputType, DataType scaleType) noexcept
9457 {
9458 return mImpl->addDynamicQuantizeV2(input, blockShape, outputType, scaleType);
9459 }
9460
9471 IEinsumLayer* addEinsum(ITensor* const* inputs, int32_t nbInputs, char const* equation) noexcept
9472 {
9473 return mImpl->addEinsum(inputs, nbInputs, equation);
9474 }
9475
9490 {
9491 return mImpl->addGridSample(input, grid);
9492 }
9493
9511 TRT_DEPRECATED INMSLayer* addNMS(ITensor& boxes, ITensor& scores, ITensor& maxOutputBoxesPerClass) noexcept
9512 {
9513 return mImpl->addNMS(boxes, scores, maxOutputBoxesPerClass);
9514 }
9515
9531 INMSLayer* addNMS(ITensor& boxes, ITensor& scores, ITensor& maxOutputBoxesPerClass, DataType indicesType) noexcept
9532 {
9533 return mImpl->addNMSV2(boxes, scores, maxOutputBoxesPerClass, indicesType);
9534 }
9535
9549 {
9550 return mImpl->addReverseSequence(input, sequenceLens);
9551 }
9552
9580 TRT_DEPRECATED INormalizationLayer* addNormalization(ITensor& input, ITensor& scale, ITensor& bias, uint32_t axesMask) noexcept
9581 {
9582 return mImpl->addNormalization(input, scale, bias, axesMask);
9583 }
9584
9602 ICumulativeLayer* addCumulative(ITensor& input, ITensor& axis, CumulativeOperation operation, bool exclusive, bool reverse) noexcept
9603 {
9604 return mImpl->addCumulative(input, axis, operation, exclusive, reverse);
9605 }
9606
9633 ITensor& query, ITensor& key, ITensor& value, AttentionNormalizationOp normOp, bool causal) noexcept
9634 {
9635 return mImpl->addAttention(query, key, value, normOp, causal);
9636 }
9637
9663 CausalMaskKind causalKind) noexcept
9664 {
9665 return mImpl->addAttentionV2(query, key, value, normOp, causalKind);
9666 }
9667
9687 IRotaryEmbeddingLayer* addRotaryEmbedding(ITensor& input, ITensor& cosCache, ITensor& sinCache, bool interleaved, int32_t rotaryEmbeddingDim) noexcept
9688 {
9689 return mImpl->addRotaryEmbedding(input, cosCache, sinCache, interleaved, rotaryEmbeddingDim);
9690 }
9691
9722 ITensor& cache, ITensor& update, ITensor& writeIndices, KVCacheMode cacheMode) noexcept
9723 {
9724 return mImpl->addKVCacheUpdate(cache, update, writeIndices, cacheMode);
9725 }
9726
9743 IMoELayer* addMoE(ITensor& hiddenStates, ITensor& selectedExpertsForTokens, ITensor& scoresForSelectedExperts) noexcept
9744 {
9745 return mImpl->addMoE(hiddenStates, selectedExpertsForTokens, scoresForSelectedExperts);
9746 }
9747
9776 ReduceOperation reduceOp, int64_t root, int64_t* groups, int64_t groupSize) noexcept
9777 {
9778 return mImpl->addDistCollective(input, distCollectiveOp, reduceOp, root, groups, groupSize);
9779 }
9780
9787 virtual IBuilder& getBuilder() const noexcept
9788 {
9789 return mImpl->getBuilder();
9790 }
9791
9800 bool markWeightsRefittable(char const* name) noexcept
9801 {
9802 return mImpl->markWeightsRefittable(name);
9803 }
9804
9812 bool unmarkWeightsRefittable(char const* name) noexcept
9813 {
9814 return mImpl->unmarkWeightsRefittable(name);
9815 }
9816
9825 bool areWeightsMarkedRefittable(char const* name) const noexcept
9826 {
9827 return mImpl->areWeightsMarkedRefittable(name);
9828 }
9829
9844 ISqueezeLayer* addSqueeze(ITensor& input, ITensor& axes) noexcept
9845 {
9846 return mImpl->addSqueeze(input, axes);
9847 }
9848
9866 {
9867 return mImpl->addUnsqueeze(input, axes);
9868 }
9869
9891 TRT_NODISCARD INormalizationLayer* addNormalizationV2(ITensor& input, ITensor& scale, ITensor& bias, uint32_t axesMask) noexcept
9892 {
9893 return mImpl->addNormalizationV2(input, scale, bias, axesMask);
9894 }
9895
9896protected:
9897 apiv::VNetworkDefinition* mImpl;
9898};
9899
9900inline INetworkDefinition::~INetworkDefinition() noexcept = default;
9901
9919enum class RuntimePlatform : int32_t
9920{
9923 kSAME_AS_BUILD = 0,
9924
9927 kWINDOWS_AMD64 = 1,
9928
9929
9930};
9931
9937template <>
9939{
9940 static constexpr int32_t kVALUE = 2;
9941};
9942
9949using BuilderFlags = uint32_t;
9950
9958enum class BuilderFlag : int32_t
9959{
9960
9962 kDEBUG = 2,
9963
9965 kGPU_FALLBACK = 3,
9966
9968 kREFIT = 4,
9969
9972
9976 kTF32 = 6,
9977
9979 kSPARSE_WEIGHTS = 7,
9980
9993
9994
10000
10007
10013
10014
10018
10019
10025
10027 kSTRIP_PLAN = 18,
10028
10035 kREFIT_IDENTICAL = 19,
10036
10062 kWEIGHT_STREAMING = 20,
10063
10064
10069 kREFIT_INDIVIDUAL = 22,
10070
10079 kSTRICT_NANS = 23,
10080
10082 kMONITOR_MEMORY = 24,
10083
10084
10087
10099
10100
10101};
10102
10108template <>
10110{
10111 static constexpr int32_t kVALUE = 28;
10112};
10113
10114namespace v_1_0
10115{
10129{
10130 uint8_t data[16];
10131};
10132
10141{
10143 uint64_t tacticHash;
10147 static constexpr uint64_t kINVALID_TACTIC_HASH = UINT64_MAX;
10148};
10149} // namespace v_1_0
10150
10164class ITimingCache : public INoCopy
10165{
10166public:
10167 virtual ~ITimingCache() noexcept = 0;
10168
10178 nvinfer1::IHostMemory* serialize() const noexcept
10179 {
10180 return mImpl->serialize();
10181 }
10182
10202 bool combine(ITimingCache const& inputCache, bool ignoreMismatch) noexcept
10203 {
10204 return mImpl->combine(inputCache, ignoreMismatch);
10205 }
10206
10212 bool reset() noexcept
10213 {
10214 return mImpl->reset();
10215 }
10216
10231 int64_t queryKeys(TimingCacheKey* keyBuffer, int64_t capacity) const noexcept
10232 {
10233 return mImpl->queryKeys(keyBuffer, capacity);
10234 }
10235
10248 TimingCacheValue query(TimingCacheKey const& key) const noexcept
10249 {
10250 return mImpl->query(key);
10251 }
10252
10270 bool update(TimingCacheKey const& key, TimingCacheValue const& value) noexcept
10271 {
10272 return mImpl->update(key, value);
10273 }
10274
10275protected:
10276 apiv::VTimingCache* mImpl;
10277};
10278
10279inline ITimingCache::~ITimingCache() noexcept = default;
10280
10288enum class MemoryPoolType : int32_t
10289{
10295 kWORKSPACE = 0,
10296
10304
10310 kDLA_LOCAL_DRAM = 2,
10311
10317 kDLA_GLOBAL_DRAM = 3,
10318
10326 kTACTIC_DRAM = 4,
10327
10341};
10342
10348template <>
10350{
10351 static constexpr int32_t kVALUE = 6;
10352};
10353
10362enum class PreviewFeature : int32_t
10363{
10368
10375};
10376
10382template <>
10384{
10385 static constexpr int32_t kVALUE = 2;
10386};
10387
10396enum class HardwareCompatibilityLevel : int32_t
10397{
10400 kNONE = 0,
10401
10413 kAMPERE_PLUS = 1,
10414
10424};
10425
10431template <>
10433{
10434 static constexpr int32_t kVALUE = 3;
10435};
10436
10437
10446enum class TilingOptimizationLevel : int32_t
10447{
10449 kNONE = 0,
10450
10452 kFAST = 1,
10453
10456 kMODERATE = 2,
10457
10459 kFULL = 3
10460
10461};
10462
10468template <>
10470{
10471 static constexpr int32_t kVALUE = 4;
10472};
10473
10474namespace v_1_0
10475{
10477{
10478public:
10479 IProgressMonitor() = default;
10480 virtual ~IProgressMonitor() noexcept = default;
10481
10485 InterfaceInfo getInterfaceInfo() const noexcept override
10486 {
10487 return InterfaceInfo{"IProgressMonitor", 1, 0};
10488 }
10489
10509 virtual void phaseStart(char const* phaseName, char const* parentPhase, int32_t nbSteps) noexcept = 0;
10510
10523 virtual bool stepComplete(char const* phaseName, int32_t step) noexcept = 0;
10524
10536 virtual void phaseFinish(char const* phaseName) noexcept = 0;
10537
10538}; // class IProgressMonitor
10539} // namespace v_1_0
10540
10561
10570{
10571public:
10572 virtual ~IBuilderConfig() noexcept = 0;
10573
10582 virtual void setAvgTimingIterations(int32_t avgTiming) noexcept
10583 {
10584 mImpl->setAvgTimingIterations(avgTiming);
10585 }
10586
10594 int32_t getAvgTimingIterations() const noexcept
10595 {
10596 return mImpl->getAvgTimingIterations();
10597 }
10598
10607 void setEngineCapability(EngineCapability capability) noexcept
10608 {
10609 mImpl->setEngineCapability(capability);
10610 }
10611
10620 {
10621 return mImpl->getEngineCapability();
10622 }
10623
10636 void setFlags(BuilderFlags builderFlags) noexcept
10637 {
10638 mImpl->setFlags(builderFlags);
10639 }
10640
10648 BuilderFlags getFlags() const noexcept
10649 {
10650 return mImpl->getFlags();
10651 }
10652
10660 void clearFlag(BuilderFlag builderFlag) noexcept
10661 {
10662 mImpl->clearFlag(builderFlag);
10663 }
10664
10672 void setFlag(BuilderFlag builderFlag) noexcept
10673 {
10674 mImpl->setFlag(builderFlag);
10675 }
10676
10684 bool getFlag(BuilderFlag builderFlag) const noexcept
10685 {
10686 return mImpl->getFlag(builderFlag);
10687 }
10688
10701 void setDeviceType(ILayer const* layer, DeviceType deviceType) noexcept
10702 {
10703 mImpl->setDeviceType(layer, deviceType);
10704 }
10705
10711 DeviceType getDeviceType(ILayer const* layer) const noexcept
10712 {
10713 return mImpl->getDeviceType(layer);
10714 }
10715
10723 bool isDeviceTypeSet(ILayer const* layer) const noexcept
10724 {
10725 return mImpl->isDeviceTypeSet(layer);
10726 }
10727
10733 void resetDeviceType(ILayer const* layer) noexcept
10734 {
10735 mImpl->resetDeviceType(layer);
10736 }
10737
10743 bool canRunOnDLA(ILayer const* layer) const noexcept
10744 {
10745 return mImpl->canRunOnDLA(layer);
10746 }
10747
10759 void setDLACore(int32_t dlaCore) noexcept
10760 {
10761 mImpl->setDLACore(dlaCore);
10762 }
10763
10769 int32_t getDLACore() const noexcept
10770 {
10771 return mImpl->getDLACore();
10772 }
10773
10780 void setDefaultDeviceType(DeviceType deviceType) noexcept
10781 {
10782 mImpl->setDefaultDeviceType(deviceType);
10783 }
10784
10791 {
10792 return mImpl->getDefaultDeviceType();
10793 }
10794
10800 void reset() noexcept
10801 {
10802 mImpl->reset();
10803 }
10804
10812 void setProfileStream(const cudaStream_t stream) noexcept
10813 {
10814 return mImpl->setProfileStream(stream);
10815 }
10816
10824 cudaStream_t getProfileStream() const noexcept
10825 {
10826 return mImpl->getProfileStream();
10827 }
10828
10841 int32_t addOptimizationProfile(IOptimizationProfile const* profile) noexcept
10842 {
10843 return mImpl->addOptimizationProfile(profile);
10844 }
10845
10854 int32_t getNbOptimizationProfiles() const noexcept
10855 {
10856 return mImpl->getNbOptimizationProfiles();
10857 }
10858
10867 {
10868 mImpl->setProfilingVerbosity(verbosity);
10869 }
10870
10880 {
10881 return mImpl->getProfilingVerbosity();
10882 }
10883
10901 bool setTacticSources(TacticSources tacticSources) noexcept
10902 {
10903 return mImpl->setTacticSources(tacticSources);
10904 }
10905
10917 {
10918 return mImpl->getTacticSources();
10919 }
10920
10936 nvinfer1::ITimingCache* createTimingCache(void const* blob, std::size_t size) const noexcept
10937 {
10938 return mImpl->createTimingCache(blob, size);
10939 }
10940
10959 bool setTimingCache(ITimingCache const& cache, bool ignoreMismatch) noexcept
10960 {
10961 return mImpl->setTimingCache(cache, ignoreMismatch);
10962 }
10963
10970 {
10971 return mImpl->getTimingCache();
10972 }
10973
11001 void setMemoryPoolLimit(MemoryPoolType pool, std::size_t poolSize) noexcept
11002 {
11003 mImpl->setMemoryPoolLimit(pool, poolSize);
11004 }
11005
11020 std::size_t getMemoryPoolLimit(MemoryPoolType pool) const noexcept
11021 {
11022 return mImpl->getMemoryPoolLimit(pool);
11023 }
11024
11038 void setPreviewFeature(PreviewFeature feature, bool enable) noexcept
11039 {
11040 mImpl->setPreviewFeature(feature, enable);
11041 }
11042
11052 bool getPreviewFeature(PreviewFeature feature) const noexcept
11053 {
11054 return mImpl->getPreviewFeature(feature);
11055 }
11056
11085 void setBuilderOptimizationLevel(int32_t level) noexcept
11086 {
11087 mImpl->setBuilderOptimizationLevel(level);
11088 }
11089
11098 {
11099 return mImpl->getBuilderOptimizationLevel();
11100 }
11101
11114 void setHardwareCompatibilityLevel(HardwareCompatibilityLevel hardwareCompatibilityLevel) noexcept
11115 {
11116 mImpl->setHardwareCompatibilityLevel(hardwareCompatibilityLevel);
11117 }
11118
11128 {
11129 return mImpl->getHardwareCompatibilityLevel();
11130 }
11131
11140 void setPluginsToSerialize(char const* const* paths, int32_t nbPaths) noexcept
11141 {
11142 mImpl->setPluginsToSerialize(paths, nbPaths);
11143 }
11144
11153 char const* getPluginToSerialize(int32_t index) const noexcept
11154 {
11155 return mImpl->getPluginToSerialize(index);
11156 }
11157
11163 int32_t getNbPluginsToSerialize() const noexcept
11164 {
11165 return mImpl->getNbPluginsToSerialize();
11166 }
11167
11194 bool setMaxAuxStreams(int32_t nbStreams) noexcept
11195 {
11196 return mImpl->setMaxAuxStreams(nbStreams);
11197 }
11198
11204 int32_t getMaxAuxStreams() const noexcept
11205 {
11206 return mImpl->getMaxAuxStreams();
11207 }
11208
11220 void setProgressMonitor(IProgressMonitor* monitor) noexcept
11221 {
11222 return mImpl->setProgressMonitor(monitor);
11223 }
11224
11231 {
11232 return mImpl->getProgressMonitor();
11233 }
11234
11246 void setRuntimePlatform(RuntimePlatform runtimePlatform) noexcept
11247 {
11248 mImpl->setRuntimePlatform(runtimePlatform);
11249 }
11250
11259 {
11260 return mImpl->getRuntimePlatform();
11261 }
11262
11270 void setMaxNbTactics(int32_t maxNbTactics) noexcept
11271 {
11272 mImpl->setMaxNbTactics(maxNbTactics);
11273 }
11274
11282 int32_t getMaxNbTactics() const noexcept
11283 {
11284 return mImpl->getMaxNbTactics();
11285 }
11286
11299 {
11300 return mImpl->setTilingOptimizationLevel(level);
11301 }
11302
11311 {
11312 return mImpl->getTilingOptimizationLevel();
11313 }
11314
11326 bool setL2LimitForTiling(int64_t size) noexcept
11327 {
11328 return mImpl->setL2LimitForTiling(size);
11329 }
11330
11338 int64_t getL2LimitForTiling() const noexcept
11339 {
11340 return mImpl->getL2LimitForTiling();
11341 }
11342
11352 bool setRemoteAutoTuningConfig(char const* config) noexcept
11353 {
11354 return mImpl->setRemoteAutoTuningConfig(config);
11355 }
11356
11362 char const* getRemoteAutoTuningConfig() const noexcept
11363 {
11364 return mImpl->getRemoteAutoTuningConfig();
11365 }
11366
11367protected:
11368 apiv::VBuilderConfig* mImpl;
11369};
11370
11371inline IBuilderConfig::~IBuilderConfig() noexcept = default;
11372
11381
11391{
11404
11409};
11410
11416template <>
11418{
11419 static constexpr int32_t kVALUE = 3;
11420};
11421
11429class IBuilder : public INoCopy
11430{
11431public:
11432 virtual ~IBuilder() noexcept = 0;
11433
11441 int32_t getMaxDLABatchSize() const noexcept
11442 {
11443 return mImpl->getMaxDLABatchSize();
11444 }
11445
11449 int32_t getNbDLACores() const noexcept
11450 {
11451 return mImpl->getNbDLACores();
11452 }
11453
11467 void setGpuAllocator(IGpuAllocator* allocator) noexcept
11468 {
11469 mImpl->setGpuAllocator(allocator);
11470 }
11471
11482 {
11483 return mImpl->createBuilderConfig();
11484 }
11485
11508 {
11509 return mImpl->createNetworkV2(flags);
11510 }
11511
11523 {
11524 return mImpl->createOptimizationProfile();
11525 }
11526
11541 void setErrorRecorder(IErrorRecorder* recorder) noexcept
11542 {
11543 mImpl->setErrorRecorder(recorder);
11544 }
11545
11557 {
11558 return mImpl->getErrorRecorder();
11559 }
11560
11564 void reset() noexcept
11565 {
11566 mImpl->reset();
11567 }
11568
11584 {
11585 return mImpl->buildSerializedNetwork(network, config);
11586 }
11587
11605 INetworkDefinition& network, IBuilderConfig& config, IStreamWriter& writer) noexcept
11606 {
11607 return mImpl->buildSerializedNetworkToStream(network, config, writer);
11608 }
11609
11629 INetworkDefinition& network, IBuilderConfig& config, IHostMemory*& kernelText) noexcept
11630 {
11631 return mImpl->buildSerializedNetworkWithKernelText(network, config, kernelText);
11632 }
11633
11650 {
11651 return mImpl->buildEngineWithConfig(network, config);
11652 }
11653
11675 bool isNetworkSupported(INetworkDefinition const& network, IBuilderConfig const& config) const noexcept
11676 {
11677 return mImpl->isNetworkSupported(network, config);
11678 }
11679
11685 ILogger* getLogger() const noexcept
11686 {
11687 return mImpl->getLogger();
11688 }
11689
11701 bool setMaxThreads(int32_t maxThreads) noexcept
11702 {
11703 return mImpl->setMaxThreads(maxThreads);
11704 }
11705
11715 int32_t getMaxThreads() const noexcept
11716 {
11717 return mImpl->getMaxThreads();
11718 }
11719
11726 {
11727 return mImpl->getPluginRegistry();
11728 }
11729
11730protected:
11731 apiv::VBuilder* mImpl;
11732};
11733
11734inline IBuilder::~IBuilder() noexcept = default;
11735
11736} // namespace nvinfer1
11737
11742extern "C" TENSORRTAPI void* createInferBuilder_INTERNAL(void* logger, int32_t version) noexcept;
11743
11744namespace nvinfer1
11745{
11746namespace
11747{
11748
11756inline IBuilder* createInferBuilder(ILogger& logger) noexcept
11757{
11758 return static_cast<IBuilder*>(createInferBuilder_INTERNAL(&logger, NV_TENSORRT_VERSION));
11759}
11760
11761} // namespace
11762
11775 nvinfer1::EngineCapability capability) noexcept;
11776
11786extern "C" TENSORRTAPI bool setInternalLibraryPath(AsciiChar const* path) noexcept;
11787
11788namespace safe
11789{
11791class IPluginRegistry;
11792} // namespace safe
11793
11801extern "C" TRT_DEPRECATED_API nvinfer1::safe::IPluginRegistry* getBuilderSafePluginRegistry(
11802 nvinfer1::EngineCapability capability) noexcept;
11803
11804} // namespace nvinfer1
11805
11806#endif // NV_INFER_H
#define TRT_DEPRECATED_API
Definition: NvInferRuntimeBase.h:44
#define TENSORRTAPI
Definition: NvInferRuntimeBase.h:70
#define NV_TENSORRT_VERSION
Definition: NvInferRuntimeBase.h:102
#define TRT_NODISCARD
A stand-in for [[nodiscard]] and [[nodiscard(REASON)]] that works with older compilers.
Definition: NvInferRuntimeBase.h:57
#define TRT_DEPRECATED
Definition: NvInferRuntimeBase.h:42
#define TRT_DEPRECATED_ENUM
Definition: NvInferRuntimeBase.h:43
Definition: NvInferRuntimeBase.h:222
static constexpr int32_t MAX_DIMS
The maximum rank (number of dimensions) supported for a tensor.
Definition: NvInferRuntimeBase.h:225
An Activation layer in a network definition.
Definition: NvInfer.h:1179
void setBeta(float beta) noexcept
Set the beta parameter (must be finite).
Definition: NvInfer.h:1227
void setActivationType(ActivationType type) noexcept
Set the type of activation to be performed.
Definition: NvInfer.h:1188
ActivationType getActivationType() const noexcept
Get the type of activation to be performed.
Definition: NvInfer.h:1198
float getAlpha() const noexcept
Get the alpha parameter.
Definition: NvInfer.h:1236
float getBeta() const noexcept
Get the beta parameter.
Definition: NvInfer.h:1245
void setAlpha(float alpha) noexcept
Set the alpha parameter (must be finite).
Definition: NvInfer.h:1213
virtual ~IActivationLayer() noexcept=0
An assertion layer in a network.
Definition: NvInfer.h:4942
void setMessage(char const *message) noexcept
Set the message to print if the assertion fails.
Definition: NvInfer.h:4952
char const * getMessage() const noexcept
Return the assertion message.
Definition: NvInfer.h:4962
virtual ~IAssertionLayer() noexcept=0
This is a base class for Attention boundary layers.
Definition: NvInfer.h:6821
IAttention * getAttention() const noexcept
Get a pointer to the IAttention associated with this boundary layer.
Definition: NvInfer.h:6826
virtual ~IAttentionBoundaryLayer() noexcept=0
Helper for constructing an attention that consumes query, key and value tensors.
Definition: NvInfer.h:6958
ITensor * getMask() noexcept
Get the optional mask in attention.
Definition: NvInfer.h:7008
bool setMetadata(char const *metadata) noexcept
Set the metadata for IAttention.
Definition: NvInfer.h:7258
TRT_NODISCARD bool setQueryLengths(ITensor *lengths) noexcept
Set the query lengths tensor.
Definition: NvInfer.h:7383
bool setDecomposable(bool decomposable) noexcept
Set whether the attention can be decomposed to use multiple kernels if no fused kernel support found.
Definition: NvInfer.h:7084
bool setName(char const *name) noexcept
Set the name of the attention.
Definition: NvInfer.h:7174
bool getDecomposable() const noexcept
Get whether the attention can be decomposed to use multiple kernels if no fused kernel support found.
Definition: NvInfer.h:7097
ITensor * getInput(int32_t index) const noexcept
Get the IAttention input corresponding to the given index.
Definition: NvInfer.h:7137
CausalMaskKind getCausalKind() const noexcept
Get the causal mask alignment orientation for the attention.
Definition: NvInfer.h:7072
ITensor * getOutput(int32_t index) const noexcept
Get the IAttention output corresponding to the given index. IAttention has only one output.
Definition: NvInfer.h:7157
int32_t getNbOutputs() const noexcept
Get the number of outputs of a layer. IAttention has one output.
Definition: NvInfer.h:7145
bool setNbRanks(int32_t nbRanks) noexcept
Set the number of ranks for multi-device attention execution.
Definition: NvInfer.h:7287
int32_t getNbInputs() const noexcept
Get the number of inputs of IAttention. IAttention has three inputs.
Definition: NvInfer.h:7125
TRT_NODISCARD bool setKeyValueLengths(ITensor *lengths) noexcept
Set the key-value lengths tensor.
Definition: NvInfer.h:7422
TRT_NODISCARD ITensor * getKeyValueLengths() const noexcept
Get the key-value lengths tensor.
Definition: NvInfer.h:7434
bool setNormalizationOperation(AttentionNormalizationOp op) noexcept
Set the normalization operation for the attention.
Definition: NvInfer.h:6967
TRT_NODISCARD AttentionIOForm getKeyValueForm() const noexcept
Get the key-value form.
Definition: NvInfer.h:7359
char const * getName() const noexcept
Return the name of the attention.
Definition: NvInfer.h:7186
bool setNormalizationQuantizeToType(DataType type) noexcept
Set the datatype the attention normalization is quantized to.
Definition: NvInfer.h:7226
int32_t getNbRanks() const noexcept
Get the number of ranks for multi-device execution.
Definition: NvInfer.h:7299
TRT_DEPRECATED bool getCausal() const noexcept
Get whether the attention will run a causal inference.
Definition: NvInfer.h:7040
AttentionNormalizationOp getNormalizationOperation() const noexcept
Get the normalization operation for the attention.
Definition: NvInfer.h:6979
bool setNormalizationQuantizeScale(ITensor &tensor) noexcept
Set the quantization scale for the attention normalization output.
Definition: NvInfer.h:7202
bool setCausalKind(CausalMaskKind kind) noexcept
Set the causal mask alignment orientation for the attention.
Definition: NvInfer.h:7060
TRT_NODISCARD AttentionIOForm getQueryForm() const noexcept
Get the query form.
Definition: NvInfer.h:7329
char const * getMetadata() const noexcept
Get the metadata of IAttention.
Definition: NvInfer.h:7271
DataType getNormalizationQuantizeToType() const noexcept
Get the datatype the attention normalization is quantized to.
Definition: NvInfer.h:7238
TRT_NODISCARD bool setQueryForm(AttentionIOForm form) noexcept
Set the query form.
Definition: NvInfer.h:7316
virtual ~IAttention() noexcept=0
ITensor * getNormalizationQuantizeScale() const noexcept
Get the quantization scale for the attention normalization output.
Definition: NvInfer.h:7213
bool setInput(int32_t index, ITensor &input) noexcept
Append or replace an input of this layer with a specific tensor.
Definition: NvInfer.h:7116
TRT_NODISCARD ITensor * getQueryLengths() const noexcept
Get the query lengths tensor.
Definition: NvInfer.h:7395
bool setMask(ITensor &mask) noexcept
Set whether a mask will be used for the normalization operation.
Definition: NvInfer.h:6996
TRT_NODISCARD bool setKeyValueForm(AttentionIOForm form) noexcept
Set the key-value form.
Definition: NvInfer.h:7346
TRT_DEPRECATED bool setCausal(bool isCausal) noexcept
Set whether the attention will run a causal inference. Cannot be used together with setMask().
Definition: NvInfer.h:7026
apiv::VAttention * mImpl
Definition: NvInfer.h:7440
This layer represents an input to an attention subgraph.
Definition: NvInfer.h:6849
virtual ~IAttentionInputLayer() noexcept=0
This layer represents an output of an IAttention.
Definition: NvInfer.h:6888
virtual ~IAttentionOutputLayer() noexcept=0
Holds properties for configuring a builder to produce an engine.
Definition: NvInfer.h:10570
void setMemoryPoolLimit(MemoryPoolType pool, std::size_t poolSize) noexcept
Set the memory size for the memory pool.
Definition: NvInfer.h:11001
nvinfer1::ITimingCache * createTimingCache(void const *blob, std::size_t size) const noexcept
Create timing cache.
Definition: NvInfer.h:10936
bool setMaxAuxStreams(int32_t nbStreams) noexcept
Set the maximum number of auxiliary streams that TRT is allowed to use.
Definition: NvInfer.h:11194
void setPreviewFeature(PreviewFeature feature, bool enable) noexcept
Enable or disable a specific preview feature.
Definition: NvInfer.h:11038
bool getPreviewFeature(PreviewFeature feature) const noexcept
Get status of preview feature.
Definition: NvInfer.h:11052
int32_t getBuilderOptimizationLevel() noexcept
Get builder optimization level.
Definition: NvInfer.h:11097
bool setTacticSources(TacticSources tacticSources) noexcept
Set tactic sources.
Definition: NvInfer.h:10901
void setPluginsToSerialize(char const *const *paths, int32_t nbPaths) noexcept
Set the plugin libraries to be serialized with version-compatible engines.
Definition: NvInfer.h:11140
bool setTilingOptimizationLevel(TilingOptimizationLevel level) noexcept
Set the Tiling optimization level.
Definition: NvInfer.h:11298
bool setL2LimitForTiling(int64_t size) noexcept
Set the L2 cache usage limit for Tiling optimization.
Definition: NvInfer.h:11326
std::size_t getMemoryPoolLimit(MemoryPoolType pool) const noexcept
Get the memory size limit of the memory pool.
Definition: NvInfer.h:11020
int32_t getDLACore() const noexcept
Get the DLA core that the engine executes on.
Definition: NvInfer.h:10769
int32_t getNbPluginsToSerialize() const noexcept
Get the number of plugin library paths to be serialized with version-compatible engines.
Definition: NvInfer.h:11163
void setDeviceType(ILayer const *layer, DeviceType deviceType) noexcept
Set the device that this layer must execute on.
Definition: NvInfer.h:10701
void setEngineCapability(EngineCapability capability) noexcept
Configure the builder to target specified EngineCapability flow.
Definition: NvInfer.h:10607
virtual ~IBuilderConfig() noexcept=0
int32_t getMaxAuxStreams() const noexcept
Get the maximum number of auxiliary streams that TRT is allowed to use.
Definition: NvInfer.h:11204
bool getFlag(BuilderFlag builderFlag) const noexcept
Returns true if the build mode flag is set.
Definition: NvInfer.h:10684
void setMaxNbTactics(int32_t maxNbTactics) noexcept
Set the maximum number of tactics to time when there is a choice of tactics.
Definition: NvInfer.h:11270
int64_t getL2LimitForTiling() const noexcept
Get the L2 cache usage limit for tiling optimization.
Definition: NvInfer.h:11338
bool setRemoteAutoTuningConfig(char const *config) noexcept
Set a config string for remote auto tuning.
Definition: NvInfer.h:11352
void setProgressMonitor(IProgressMonitor *monitor) noexcept
Sets the progress monitor for building a network.
Definition: NvInfer.h:11220
void setProfilingVerbosity(ProfilingVerbosity verbosity) noexcept
Set verbosity level of layer information exposed in NVTX annotations and IEngineInspector.
Definition: NvInfer.h:10866
int32_t getNbOptimizationProfiles() const noexcept
Get number of optimization profiles.
Definition: NvInfer.h:10854
nvinfer1::ITimingCache const * getTimingCache() const noexcept
Get the pointer to the timing cache from current IBuilderConfig.
Definition: NvInfer.h:10969
void reset() noexcept
Resets the builder configuration to defaults.
Definition: NvInfer.h:10800
bool setTimingCache(ITimingCache const &cache, bool ignoreMismatch) noexcept
Attach a timing cache to IBuilderConfig.
Definition: NvInfer.h:10959
char const * getPluginToSerialize(int32_t index) const noexcept
Get the plugin library path to be serialized with version-compatible engines.
Definition: NvInfer.h:11153
EngineCapability getEngineCapability() const noexcept
Query EngineCapability flow configured for the builder.
Definition: NvInfer.h:10619
RuntimePlatform getRuntimePlatform() const noexcept
Get the target platform for runtime execution.
Definition: NvInfer.h:11258
DeviceType getDefaultDeviceType() const noexcept
Get the default DeviceType which was set by setDefaultDeviceType.
Definition: NvInfer.h:10790
void setRuntimePlatform(RuntimePlatform runtimePlatform) noexcept
Set the target platform for runtime execution.
Definition: NvInfer.h:11246
int32_t getMaxNbTactics() const noexcept
Query the maximum number of tactics timed when there is a choice.
Definition: NvInfer.h:11282
BuilderFlags getFlags() const noexcept
Get the build mode flags for this builder config. Defaults to 0.
Definition: NvInfer.h:10648
void setFlags(BuilderFlags builderFlags) noexcept
Set the build mode flags to turn on builder options for this network.
Definition: NvInfer.h:10636
TacticSources getTacticSources() const noexcept
Get tactic sources.
Definition: NvInfer.h:10916
void resetDeviceType(ILayer const *layer) noexcept
reset the DeviceType for this layer
Definition: NvInfer.h:10733
void setDLACore(int32_t dlaCore) noexcept
Sets the DLA core used by the network. Defaults to -1.
Definition: NvInfer.h:10759
HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept
Get the hardware compatibility level.
Definition: NvInfer.h:11127
char const * getRemoteAutoTuningConfig() const noexcept
Get a config string for remote auto tuning.
Definition: NvInfer.h:11362
void clearFlag(BuilderFlag builderFlag) noexcept
clear a single build mode flag.
Definition: NvInfer.h:10660
int32_t addOptimizationProfile(IOptimizationProfile const *profile) noexcept
Add an optimization profile.
Definition: NvInfer.h:10841
IProgressMonitor * getProgressMonitor() const noexcept
Definition: NvInfer.h:11230
apiv::VBuilderConfig * mImpl
Definition: NvInfer.h:11368
int32_t getAvgTimingIterations() const noexcept
Query the number of averaging iterations.
Definition: NvInfer.h:10594
void setDefaultDeviceType(DeviceType deviceType) noexcept
Sets the default DeviceType to be used by the builder. It ensures that all the layers that can run on...
Definition: NvInfer.h:10780
void setFlag(BuilderFlag builderFlag) noexcept
Set a single build mode flag.
Definition: NvInfer.h:10672
DeviceType getDeviceType(ILayer const *layer) const noexcept
Get the device that this layer executes on.
Definition: NvInfer.h:10711
bool canRunOnDLA(ILayer const *layer) const noexcept
Checks if a layer can run on DLA.
Definition: NvInfer.h:10743
cudaStream_t getProfileStream() const noexcept
Get the CUDA stream that is used to profile this network.
Definition: NvInfer.h:10824
void setHardwareCompatibilityLevel(HardwareCompatibilityLevel hardwareCompatibilityLevel) noexcept
Set the hardware compatibility level.
Definition: NvInfer.h:11114
TilingOptimizationLevel getTilingOptimizationLevel() const noexcept
Get the Tiling optimization level.
Definition: NvInfer.h:11310
ProfilingVerbosity getProfilingVerbosity() const noexcept
Get verbosity level of layer information exposed in NVTX annotations and IEngineInspector.
Definition: NvInfer.h:10879
bool isDeviceTypeSet(ILayer const *layer) const noexcept
whether the DeviceType has been explicitly set for this layer
Definition: NvInfer.h:10723
void setBuilderOptimizationLevel(int32_t level) noexcept
Set builder optimization level.
Definition: NvInfer.h:11085
void setProfileStream(const cudaStream_t stream) noexcept
Set the CUDA stream that is used to profile this network.
Definition: NvInfer.h:10812
Builds an engine from a network definition.
Definition: NvInfer.h:11430
int32_t getNbDLACores() const noexcept
Return the number of DLA engines available to this builder.
Definition: NvInfer.h:11449
virtual ~IBuilder() noexcept=0
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInfer.h:11556
apiv::VBuilder * mImpl
Definition: NvInfer.h:11731
ILogger * getLogger() const noexcept
get the logger with which the builder was created
Definition: NvInfer.h:11685
bool isNetworkSupported(INetworkDefinition const &network, IBuilderConfig const &config) const noexcept
Checks that a network is within the scope of the IBuilderConfig settings.
Definition: NvInfer.h:11675
int32_t getMaxThreads() const noexcept
get the maximum number of threads that can be used by the builder.
Definition: NvInfer.h:11715
IPluginRegistry & getPluginRegistry() noexcept
get the local plugin registry that can be used by the builder.
Definition: NvInfer.h:11725
nvinfer1::IOptimizationProfile * createOptimizationProfile() noexcept
Create a new optimization profile.
Definition: NvInfer.h:11522
void setGpuAllocator(IGpuAllocator *allocator) noexcept
Set the GPU allocator.
Definition: NvInfer.h:11467
nvinfer1::INetworkDefinition * createNetworkV2(NetworkDefinitionCreationFlags flags) noexcept
Create a network definition object.
Definition: NvInfer.h:11507
nvinfer1::IBuilderConfig * createBuilderConfig() noexcept
Create a builder configuration object.
Definition: NvInfer.h:11481
void reset() noexcept
Resets the builder state to default values.
Definition: NvInfer.h:11564
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInfer.h:11701
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInfer.h:11541
nvinfer1::IHostMemory * buildSerializedNetwork(INetworkDefinition &network, IBuilderConfig &config) noexcept
Builds and serializes a network for the given INetworkDefinition and IBuilderConfig.
Definition: NvInfer.h:11583
bool buildSerializedNetworkToStream(INetworkDefinition &network, IBuilderConfig &config, IStreamWriter &writer) noexcept
Builds and serializes a network into stream for the given INetworkDefinition and IBuilderConfig.
Definition: NvInfer.h:11604
nvinfer1::ICudaEngine * buildEngineWithConfig(INetworkDefinition &network, IBuilderConfig &config) noexcept
Builds a network for the given INetworkDefinition and IBuilderConfig.
Definition: NvInfer.h:11649
nvinfer1::IHostMemory * buildSerializedNetwork(INetworkDefinition &network, IBuilderConfig &config, IHostMemory *&kernelText) noexcept
Extended form of buildSerializedNetwork that optionally permits getting the kernelText.
Definition: NvInfer.h:11628
A cast layer in a network.
Definition: NvInfer.h:3783
apiv::VCastLayer * mImpl
Definition: NvInfer.h:3809
DataType getToType() const noexcept
Return cast layer output type.
Definition: NvInfer.h:3803
void setToType(DataType toType) noexcept
Set cast layer output type.
Definition: NvInfer.h:3792
virtual ~ICastLayer() noexcept=0
A concatenation layer in a network definition.
Definition: NvInfer.h:1896
void setAxis(int32_t axis) noexcept
Set the axis along which concatenation occurs.
Definition: NvInfer.h:1909
int32_t getAxis() const noexcept
Get the axis along which concatenation occurs.
Definition: NvInfer.h:1919
virtual ~IConcatenationLayer() noexcept=0
This layer represents a condition input to an IIfConditional.
Definition: NvInfer.h:4446
virtual ~IConditionLayer() noexcept=0
Layer that represents a constant value.
Definition: NvInfer.h:3824
void setWeights(Weights weights) noexcept
Set the weights for the layer.
Definition: NvInfer.h:3834
Weights getWeights() const noexcept
Get the weights for the layer.
Definition: NvInfer.h:3844
virtual ~IConstantLayer() noexcept=0
void setDimensions(Dims const &dimensions) noexcept
Set the dimensions for the layer.
Definition: NvInfer.h:3856
apiv::VConstantLayer * mImpl
Definition: NvInfer.h:3874
Dims getDimensions() const noexcept
Get the dimensions for the layer.
Definition: NvInfer.h:3868
A convolution layer in a network definition.
Definition: NvInfer.h:857
Dims getPrePadding() const noexcept
Get the pre-padding.
Definition: NvInfer.h:982
Weights getBiasWeights() const noexcept
Get the bias weights for the convolution.
Definition: NvInfer.h:955
void setPaddingMode(PaddingMode paddingMode) noexcept
Set the padding mode.
Definition: NvInfer.h:1023
void setDilationNd(Dims const &dilation) noexcept
Set the multi-dimension dilation of the convolution.
Definition: NvInfer.h:1127
virtual ~IConvolutionLayer() noexcept=0
Dims getPaddingNd() const noexcept
Get the multi-dimension padding of the convolution.
Definition: NvInfer.h:1113
Dims getStrideNd() const noexcept
Get the multi-dimension stride of the convolution.
Definition: NvInfer.h:1083
Weights getKernelWeights() const noexcept
Get the kernel weights of the convolution.
Definition: NvInfer.h:930
void setStrideNd(Dims const &stride) noexcept
Set the multi-dimension stride of the convolution.
Definition: NvInfer.h:1073
Dims getDilationNd() const noexcept
Get the multi-dimension dilation of the convolution.
Definition: NvInfer.h:1137
int64_t getNbOutputMaps() const noexcept
Get the number of output maps for the convolution.
Definition: NvInfer.h:876
void setKernelWeights(Weights weights) noexcept
Set the kernel weights for the convolution.
Definition: NvInfer.h:920
Dims getPostPadding() const noexcept
Get the post-padding.
Definition: NvInfer.h:1009
int64_t getNbGroups() const noexcept
Get the number of groups of the convolution.
Definition: NvInfer.h:906
PaddingMode getPaddingMode() const noexcept
Get the padding mode.
Definition: NvInfer.h:1035
void setNbGroups(int64_t nbGroups) noexcept
Set the number of groups for a convolution.
Definition: NvInfer.h:896
void setNbOutputMaps(int64_t nbOutputMaps) noexcept
Set the number of output maps for the convolution.
Definition: NvInfer.h:866
void setBiasWeights(Weights weights) noexcept
Set the bias weights for the convolution.
Definition: NvInfer.h:945
Dims getKernelSizeNd() const noexcept
Get the multi-dimension kernel size of the convolution.
Definition: NvInfer.h:1058
void setPaddingNd(Dims const &padding) noexcept
Set the multi-dimension padding of the convolution.
Definition: NvInfer.h:1101
void setPrePadding(Dims const &padding) noexcept
Set the multi-dimension pre-padding of the convolution.
Definition: NvInfer.h:972
void setPostPadding(Dims const &padding) noexcept
Set the multi-dimension post-padding of the convolution.
Definition: NvInfer.h:999
void setKernelSizeNd(Dims const &kernelSize) noexcept
Set the multi-dimension kernel size of the convolution.
Definition: NvInfer.h:1048
An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:3013
Layer that represents a cumulative operation across a tensor.
Definition: NvInfer.h:6642
bool setOperation(CumulativeOperation op) noexcept
Set the cumulative operation for the layer.
Definition: NvInfer.h:6653
void setReverse(bool reverse) noexcept
Specify whether the cumulative operation should be applied backward.
Definition: NvInfer.h:6701
apiv::VCumulativeLayer * mImpl
Definition: NvInfer.h:6719
virtual ~ICumulativeLayer() noexcept=0
bool getExclusive() const noexcept
Get whether it is exclusive accumulation or inclusive accumulation.
Definition: NvInfer.h:6689
bool getReverse() const noexcept
Get the boolean that specifies whether the cumulative operation should be applied backward.
Definition: NvInfer.h:6713
void setExclusive(bool exclusive) noexcept
Set whether it is an exclusive accumulation or inclusive accumulation.
Definition: NvInfer.h:6677
CumulativeOperation getOperation() const noexcept
Get the cumulative operation for the layer.
Definition: NvInfer.h:6665
A deconvolution layer in a network definition.
Definition: NvInfer.h:1939
void setBiasWeights(Weights weights) noexcept
Set the bias weights for the deconvolution.
Definition: NvInfer.h:2027
int64_t getNbGroups() const noexcept
Get the number of groups for a deconvolution.
Definition: NvInfer.h:1988
Weights getKernelWeights() const noexcept
Get the kernel weights for the deconvolution.
Definition: NvInfer.h:2012
void setPrePadding(Dims const &padding) noexcept
Set the multi-dimension pre-padding of the deconvolution.
Definition: NvInfer.h:2054
Dims getStrideNd() const noexcept
Get the multi-dimension stride of the deconvolution.
Definition: NvInfer.h:2169
Dims getDilationNd() const noexcept
Get the multi-dimension dilation of the deconvolution.
Definition: NvInfer.h:2235
virtual ~IDeconvolutionLayer() noexcept=0
Weights getBiasWeights() const noexcept
Get the bias weights for the deconvolution.
Definition: NvInfer.h:2037
void setKernelWeights(Weights weights) noexcept
Set the kernel weights for the deconvolution.
Definition: NvInfer.h:2002
int64_t getNbOutputMaps() const noexcept
Get the number of output feature maps for the deconvolution.
Definition: NvInfer.h:1958
void setStrideNd(Dims const &stride) noexcept
Set the multi-dimension stride of the deconvolution.
Definition: NvInfer.h:2159
Dims getPostPadding() const noexcept
Get the padding.
Definition: NvInfer.h:2091
Dims getKernelSizeNd() const noexcept
Get the multi-dimension kernel size of the deconvolution.
Definition: NvInfer.h:2142
void setPostPadding(Dims const &padding) noexcept
Set the multi-dimension post-padding of the deconvolution.
Definition: NvInfer.h:2081
void setKernelSizeNd(Dims const &kernelSize) noexcept
Set the multi-dimension kernel size of the deconvolution.
Definition: NvInfer.h:2132
void setPaddingNd(Dims const &padding) noexcept
Set the multi-dimension padding of the deconvolution.
Definition: NvInfer.h:2187
void setNbOutputMaps(int64_t nbOutputMaps) noexcept
Set the number of output feature maps for the deconvolution.
Definition: NvInfer.h:1948
Dims getPaddingNd() const noexcept
Get the multi-dimension padding of the deconvolution.
Definition: NvInfer.h:2199
void setDilationNd(Dims const &dilation) noexcept
Set the multi-dimension dilation of the deconvolution.
Definition: NvInfer.h:2225
void setPaddingMode(PaddingMode paddingMode) noexcept
Set the padding mode.
Definition: NvInfer.h:2105
void setNbGroups(int64_t nbGroups) noexcept
Set the number of groups for a deconvolution.
Definition: NvInfer.h:1978
Dims getPrePadding() const noexcept
Get the pre-padding.
Definition: NvInfer.h:2064
PaddingMode getPaddingMode() const noexcept
Get the padding mode.
Definition: NvInfer.h:2117
A Dequantize layer in a network definition.
Definition: NvInfer.h:5561
TRT_NODISCARD Dims getBlockShape() const noexcept
Get the shape of the quantization block.
Definition: NvInfer.h:5610
void setToType(DataType toType) noexcept
Set the Dequantize layer output type.
Definition: NvInfer.h:5626
int32_t getAxis() const noexcept
Get the quantization axis.
Definition: NvInfer.h:5571
bool setBlockShape(Dims const &blockShape) noexcept
Set the shape of the quantization block.
Definition: NvInfer.h:5599
virtual ~IDequantizeLayer() noexcept=0
DataType getToType() const noexcept
Return the Dequantize layer output type.
Definition: NvInfer.h:5638
void setAxis(int32_t axis) noexcept
Set the quantization axis.
Definition: NvInfer.h:5582
Definition: NvInfer.h:8138
virtual ~IDistCollectiveLayer() noexcept=0
A network layer to perform dynamic quantization.
Definition: NvInfer.h:5668
virtual ~IDynamicQuantizeLayer() noexcept=0
DataType getScaleType() const noexcept
Return the scale factors data type.
Definition: NvInfer.h:5734
TRT_DEPRECATED void setAxis(int32_t axis) noexcept
Set the axis along which block quantization occurs.
Definition: NvInfer.h:5747
TRT_DEPRECATED void setBlockSize(int32_t size) noexcept
Set the size of the quantization block.
Definition: NvInfer.h:5770
Dims getBlockShape() const noexcept
Get the shape of the quantization block.
Definition: NvInfer.h:5805
void setScaleType(DataType scaleType) noexcept
Set the data type of the scale factors used to quantize the data.
Definition: NvInfer.h:5721
DataType getToType() const noexcept
Return DynamicQuantizeLayer's quantized output type.
Definition: NvInfer.h:5708
TRT_DEPRECATED int32_t getAxis() const noexcept
Get the axis along which blocking occurs.
Definition: NvInfer.h:5757
void setToType(DataType toType) noexcept
Set DynamicQuantizeLayer's quantized output type.
Definition: NvInfer.h:5695
void setBlockShape(Dims const &blockShape) noexcept
Set the shape of the quantization block.
Definition: NvInfer.h:5793
TRT_DEPRECATED int32_t getBlockSize() const noexcept
Get the size of the quantization block.
Definition: NvInfer.h:5780
An Einsum layer in a network.
Definition: NvInfer.h:5852
bool setEquation(char const *equation) noexcept
Set the equation. The equation is a comma-separated list of subscript labels, where each label refers...
Definition: NvInfer.h:5863
virtual ~IEinsumLayer() noexcept=0
char const * getEquation() const noexcept
Return the equation.
Definition: NvInfer.h:5873
A elementwise layer in a network definition.
Definition: NvInfer.h:2308
apiv::VElementWiseLayer * mImpl
Definition: NvInfer.h:2337
ElementWiseOperation getOperation() const noexcept
Get the binary operation for the layer.
Definition: NvInfer.h:2331
void setOperation(ElementWiseOperation op) noexcept
Set the binary operation for the layer.
Definition: NvInfer.h:2319
virtual ~IElementWiseLayer() noexcept=0
Generate a tensor according to a specified mode.
Definition: NvInfer.h:5058
bool isAlphaBetaInt64() const noexcept
Return true if alpha/beta have type int64, false if they have type double.
Definition: NvInfer.h:5290
FillOperation getOperation() const noexcept
Get the fill operation for the layer.
Definition: NvInfer.h:5104
void setOperation(FillOperation op) noexcept
Set the fill operation for the layer.
Definition: NvInfer.h:5094
DataType getToType() const noexcept
Get the fill layer output type.
Definition: NvInfer.h:5320
void setAlphaInt64(int64_t alpha) noexcept
Set the alpha parameter with int64 datatype.
Definition: NvInfer.h:5233
void setBetaInt64(int64_t beta) noexcept
Set the beta parameter with int64 datatype.
Definition: NvInfer.h:5267
virtual ~IFillLayer() noexcept=0
void setBeta(double beta) noexcept
Set the beta parameter.
Definition: NvInfer.h:5157
int64_t getAlphaInt64() const noexcept
Get the value of alpha parameter with int64 datatype.
Definition: NvInfer.h:5248
int64_t getBetaInt64() const noexcept
Get the value of beta parameter with int64 datatype.
Definition: NvInfer.h:5282
double getAlpha() const noexcept
Get the value of alpha parameter.
Definition: NvInfer.h:5138
void setDimensions(Dims const &dimensions) noexcept
Set the output tensor's dimensions.
Definition: NvInfer.h:5069
void setAlpha(double alpha) noexcept
Set the alpha parameter.
Definition: NvInfer.h:5123
void setToType(DataType toType) noexcept
Set the fill layer output type.
Definition: NvInfer.h:5308
Dims getDimensions() const noexcept
Get the output tensor's dimensions.
Definition: NvInfer.h:5084
double getBeta() const noexcept
Get the value of beta parameter.
Definition: NvInfer.h:5172
A Gather layer in a network definition. Supports several kinds of gathering.
Definition: NvInfer.h:2443
void setGatherAxis(int32_t axis) noexcept
Set the axis used by GatherMode::kELEMENTS and GatherMode::kDEFAULT The axis must be less than the nu...
Definition: NvInfer.h:2454
void setNbElementWiseDims(int32_t elementWiseDims) noexcept
Set the number of leading dimensions of indices tensor to be handled elementwise.
Definition: NvInfer.h:2489
apiv::VGatherLayer * mImpl
Definition: NvInfer.h:2525
virtual ~IGatherLayer() noexcept=0
int32_t getNbElementWiseDims() const noexcept
Get the number of leading dimensions of indices tensor to be handled elementwise.
Definition: NvInfer.h:2499
void setMode(GatherMode mode) noexcept
Set the gather mode.
Definition: NvInfer.h:2509
int32_t getGatherAxis() const noexcept
Get the axis to gather on.
Definition: NvInfer.h:2466
GatherMode getMode() const noexcept
Get the gather mode.
Definition: NvInfer.h:2519
A GridSample layer in a network definition.
Definition: NvInfer.h:6081
void setInterpolationMode(InterpolationMode mode) noexcept
Set the grid sample interpolation mode.
Definition: NvInfer.h:6088
bool setSampleMode(SampleMode mode) noexcept
Set the sample mode.
Definition: NvInfer.h:6134
void setAlignCorners(bool alignCorners) noexcept
Set the align corners mode.
Definition: NvInfer.h:6110
apiv::VGridSampleLayer * mImpl
Definition: NvInfer.h:6152
SampleMode getSampleMode() const noexcept
Get the sample mode.
Definition: NvInfer.h:6146
virtual ~IGridSampleLayer() noexcept=0
InterpolationMode getInterpolationMode() const noexcept
Get the grid sample interpolation mode.
Definition: NvInfer.h:6100
bool getAlignCorners() const noexcept
Get the align corners mode.
Definition: NvInfer.h:6122
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:139
A layer that represents the identity function.
Definition: NvInfer.h:3768
virtual ~IIdentityLayer() noexcept=0
apiv::VIdentityLayer * mImpl
Definition: NvInfer.h:3770
This is a base class for Conditional boundary layers.
Definition: NvInfer.h:4423
IIfConditional * getConditional() const noexcept
Get a pointer to the IIfConditional associated with this boundary layer.
Definition: NvInfer.h:4428
virtual ~IIfConditionalBoundaryLayer() noexcept=0
Helper for constructing conditionally-executed subgraphs.
Definition: NvInfer.h:4514
IIfConditionalInputLayer * addInput(ITensor &input) noexcept
Add an If-conditional input.
Definition: NvInfer.h:4555
char const * getName() const noexcept
Return the name of the conditional.
Definition: NvInfer.h:4580
IConditionLayer * setCondition(ITensor &condition) noexcept
Set the condition tensor for this If-Conditional construct.
Definition: NvInfer.h:4525
virtual ~IIfConditional() noexcept=0
IIfConditionalOutputLayer * addOutput(ITensor &trueSubgraphOutput, ITensor &falseSubgraphOutput) noexcept
Add an If-conditional output.
Definition: NvInfer.h:4543
void setName(char const *name) noexcept
Set the name of the conditional.
Definition: NvInfer.h:4570
This layer represents an input to an IIfConditional.
Definition: NvInfer.h:4480
virtual ~IIfConditionalInputLayer() noexcept=0
This layer represents an output of an IIfConditional.
Definition: NvInfer.h:4465
virtual ~IIfConditionalOutputLayer() noexcept=0
A layer to do iterations.
Definition: NvInfer.h:4753
void setReverse(bool reverse) noexcept
Set iteration order to be reverse.
Definition: NvInfer.h:4780
virtual ~IIteratorLayer() noexcept=0
bool getReverse() const noexcept
Check if the iteration order is reverse.
Definition: NvInfer.h:4790
int32_t getAxis() const noexcept
Get axis being iterated over.
Definition: NvInfer.h:4766
void setAxis(int32_t axis) noexcept
Set axis to iterate over.
Definition: NvInfer.h:4758
Layer that represents a KVCacheUpdate operation.
Definition: NvInfer.h:7562
bool setCacheMode(KVCacheMode cacheMode) noexcept
Set the mode of the KVCacheUpdate layer.
Definition: NvInfer.h:7587
TRT_NODISCARD ITensor * getUpdateLengths() const noexcept
Get the update lengths tensor.
Definition: NvInfer.h:7662
virtual ~IKVCacheUpdateLayer() noexcept=0
TRT_NODISCARD AttentionIOForm getUpdateForm() const noexcept
Get the update form.
Definition: NvInfer.h:7628
TRT_NODISCARD bool setUpdateLengths(ITensor *lengths) noexcept
Set the update lengths tensor.
Definition: NvInfer.h:7650
TRT_NODISCARD bool setUpdateForm(AttentionIOForm form) noexcept
Set the update form.
Definition: NvInfer.h:7615
KVCacheMode getCacheMode() const noexcept
Get the mode of the KVCacheUpdate layer.
Definition: NvInfer.h:7597
apiv::VKVCacheUpdateLayer * mImpl
Definition: NvInfer.h:7668
A LRN layer in a network definition.
Definition: NvInfer.h:1545
int64_t getWindowSize() const noexcept
Get the LRN window size.
Definition: NvInfer.h:1566
virtual ~ILRNLayer() noexcept=0
float getAlpha() const noexcept
Get the LRN alpha value.
Definition: NvInfer.h:1588
void setWindowSize(int64_t windowSize) noexcept
Set the LRN window size.
Definition: NvInfer.h:1556
void setK(float k) noexcept
Set the LRN K value.
Definition: NvInfer.h:1622
void setAlpha(float alpha) noexcept
Set the LRN alpha value.
Definition: NvInfer.h:1578
void setBeta(float beta) noexcept
Set the LRN beta value.
Definition: NvInfer.h:1600
float getBeta() const noexcept
Get the LRN beta value.
Definition: NvInfer.h:1610
float getK() const noexcept
Get the LRN K value.
Definition: NvInfer.h:1632
Base class for all layer classes in a network definition.
Definition: NvInfer.h:490
virtual ~ILayer() noexcept=0
void setMetadata(char const *metadata) noexcept
Set the metadata for this layer.
Definition: NvInfer.h:611
void setName(char const *name) noexcept
Set the name of a layer.
Definition: NvInfer.h:511
int32_t getNbInputs() const noexcept
Get the number of inputs of a layer.
Definition: NvInfer.h:529
int32_t getNbRanks() const noexcept
Get the number of ranks for multi-device execution.
Definition: NvInfer.h:657
char const * getMetadata() const noexcept
Get the metadata of the layer.
Definition: NvInfer.h:624
DataType getOutputType(int32_t index) const noexcept
get the output type of this layer
Definition: NvInfer.h:592
char const * getName() const noexcept
Return the name of a layer.
Definition: NvInfer.h:521
int32_t getNbOutputs() const noexcept
Get the number of outputs of a layer.
Definition: NvInfer.h:550
ITensor * getOutput(int32_t index) const noexcept
Get the layer output corresponding to the given index.
Definition: NvInfer.h:560
void setInput(int32_t index, ITensor &tensor) noexcept
Replace an input of this layer with a specific tensor.
Definition: NvInfer.h:577
ITensor * getInput(int32_t index) const noexcept
Get the layer input corresponding to the given index.
Definition: NvInfer.h:542
bool setNbRanks(int32_t nbRanks) noexcept
Set the number of ranks for multi-device execution.
Definition: NvInfer.h:645
LayerType getType() const noexcept
Return the type of a layer.
Definition: NvInfer.h:497
This is a base class for Loop boundary layers.
Definition: NvInfer.h:4398
virtual ~ILoopBoundaryLayer() noexcept=0
ILoop * getLoop() const noexcept
Get a pointer to ILoop associated with this boundary layer.
Definition: NvInfer.h:4403
Helper for creating a recurrent subgraph.
Definition: NvInfer.h:4813
void setName(char const *name) noexcept
Set the name of the loop.
Definition: NvInfer.h:4883
ITripLimitLayer * addTripLimit(ITensor &tensor, TripLimit limit) noexcept
Add a trip-count limiter, based on the given tensor.
Definition: NvInfer.h:4842
IIteratorLayer * addIterator(ITensor &tensor, int32_t axis=0, bool reverse=false) noexcept
Return layer that subscripts tensor by loop iteration.
Definition: NvInfer.h:4855
ILoopOutputLayer * addLoopOutput(ITensor &tensor, LoopOutput outputKind, int32_t axis=0) noexcept
Make an output for this loop, based on the given tensor.
Definition: NvInfer.h:4868
virtual ~ILoop() noexcept=0
char const * getName() const noexcept
Return the name of the loop.
Definition: NvInfer.h:4893
IRecurrenceLayer * addRecurrence(ITensor &initialValue) noexcept
Create a recurrence layer for this loop with initialValue as its first input.
Definition: NvInfer.h:4821
An ILoopOutputLayer is the sole way to get output from a loop.
Definition: NvInfer.h:4649
int32_t getAxis() const noexcept
Get axis being concatenated over.
Definition: NvInfer.h:4679
LoopOutput getLoopOutput() const noexcept
Get which kind a loop output has.
Definition: NvInfer.h:4654
virtual ~ILoopOutputLayer() noexcept=0
void setAxis(int32_t axis) noexcept
Set where to insert the contenation axis. Ignored if getLoopOutput() is kLAST_VALUE.
Definition: NvInfer.h:4671
Layer that represents a Matrix Multiplication.
Definition: NvInfer.h:3609
apiv::VMatrixMultiplyLayer * mImpl
Definition: NvInfer.h:3637
virtual ~IMatrixMultiplyLayer() noexcept=0
MatrixOperation getOperation(int32_t index) const noexcept
Get the operation for an input tensor.
Definition: NvInfer.h:3631
void setOperation(int32_t index, MatrixOperation op) noexcept
Set the operation for an input tensor.
Definition: NvInfer.h:3619
A MoE layer in a network definition. Mixture of Experts (MoE) is a collection of experts with each ex...
Definition: NvInfer.h:7813
void setSwigluParamLimit(float limit) noexcept
Set the SwiGLU parameter limit.
Definition: NvInfer.h:8035
void setDynQOutputScaleType(DataType type) noexcept
Set the dynamic quantization output scale type.
Definition: NvInfer.h:7988
MoEActType getActivationType() const noexcept
Get the activation type for the MoE layer.
Definition: NvInfer.h:7862
void setQuantizationToType(DataType type) noexcept
Set the data type the mul output is quantized to.
Definition: NvInfer.h:7936
void setQuantizationDynamicDblQ(ITensor &fcDownActivationDblQScale, DataType dataType, Dims const &blockShape, DataType dynQOutputScaleType) noexcept
Configure dynamic quantization (with double quantization) after the mul op.
Definition: NvInfer.h:7921
void setQuantizationStatic(ITensor &fcDownActivationScale, DataType dataType) noexcept
Configure static quantization after the mul op.
Definition: NvInfer.h:7888
float getSwigluParamLimit() const noexcept
Get the SwiGLU parameter limit.
Definition: NvInfer.h:8047
DataType getQuantizationToType() const noexcept
Get the data type the mul in MoE layer is quantized to.
Definition: NvInfer.h:7948
DataType getDynQOutputScaleType() const noexcept
Get the dynamic quantization output scale type.
Definition: NvInfer.h:8000
virtual ~IMoELayer() noexcept=0
void setActivationType(MoEActType activationType) noexcept
Set the activation type for the MoE layer.
Definition: NvInfer.h:7850
Dims getQuantizationBlockShape() const noexcept
Get the block shape for the quantization of the Mul output.
Definition: NvInfer.h:7976
void setGatedWeights(ITensor &fcGateWeights, ITensor &fcUpWeights, ITensor &fcDownWeights, MoEActType activationType) noexcept
Set the weights of the experts when each expert is a GLU (gated linear unit). In each GLU,...
Definition: NvInfer.h:7826
float getSwigluParamBeta() const noexcept
Get the SwiGLU parameter beta.
Definition: NvInfer.h:8099
void setSwigluParamBeta(float beta) noexcept
Set the SwiGLU parameter beta.
Definition: NvInfer.h:8087
void setGatedBiases(ITensor &fcGateBiases, ITensor &fcUpBiases, ITensor &fcDownBiases) noexcept
Set the biases of the experts when each expert is a GLU (gated linear unit). In each GLU,...
Definition: NvInfer.h:7838
void setSwigluParams(float limit, float alpha, float beta) noexcept
Set the SwiGLU parameters.
Definition: NvInfer.h:8021
void setQuantizationBlockShape(Dims const &blockShape) noexcept
Set the block shape for the quantization of the Mul output.
Definition: NvInfer.h:7964
void setInput(int32_t index, ITensor &tensor) noexcept
Set the input of the MoE layer.
Definition: NvInfer.h:8116
float getSwigluParamAlpha() const noexcept
Get the SwiGLU parameter alpha.
Definition: NvInfer.h:8073
void setSwigluParamAlpha(float alpha) noexcept
Set the SwiGLU parameter alpha.
Definition: NvInfer.h:8061
A non-maximum suppression layer in a network definition.
Definition: NvInfer.h:6235
void setTopKBoxLimit(int32_t limit) noexcept
Set the TopK box limit parameter for the layer.
Definition: NvInfer.h:6272
void setBoundingBoxFormat(BoundingBoxFormat fmt) noexcept
Set the bounding box format parameter for the layer.
Definition: NvInfer.h:6246
BoundingBoxFormat getBoundingBoxFormat() const noexcept
Get the bounding box format parameter for the layer.
Definition: NvInfer.h:6258
bool setIndicesType(DataType type) noexcept
Set the indices type for the layer.
Definition: NvInfer.h:6317
apiv::VNMSLayer * mImpl
Definition: NvInfer.h:6335
int32_t getTopKBoxLimit() const noexcept
Get the TopK box limit parameter for the layer.
Definition: NvInfer.h:6282
DataType getIndicesType() const noexcept
Return the NMS layer indices type.
Definition: NvInfer.h:6329
virtual ~INMSLayer() noexcept=0
A network definition for input to the builder.
Definition: NvInfer.h:8164
IConcatenationLayer * addConcatenation(ITensor *const *inputs, int32_t nbInputs) noexcept
Add a concatenation layer to the network.
Definition: NvInfer.h:8392
IShuffleLayer * addShuffle(ITensor &input) noexcept
Add a shuffle layer to the network.
Definition: NvInfer.h:8455
void setName(char const *name) noexcept
Sets the name of the network.
Definition: NvInfer.h:8921
ITopKLayer * addTopK(ITensor &input, TopKOperation op, int32_t k, uint32_t reduceAxes, DataType indicesType) noexcept
Add a TopK layer to the network.
Definition: NvInfer.h:8651
bool markDebug(ITensor &tensor) noexcept
Mark a tensor as a debug tensor.
Definition: NvInfer.h:8235
ILRNLayer * addLRN(ITensor &input, int64_t window, float alpha, float beta, float k) noexcept
Add a LRN layer to the network.
Definition: NvInfer.h:8336
ICumulativeLayer * addCumulative(ITensor &input, ITensor &axis, CumulativeOperation operation, bool exclusive, bool reverse) noexcept
Add a cumulative layer to the network.
Definition: NvInfer.h:9602
IAssertionLayer * addAssertion(ITensor &condition, char const *message) noexcept
Add an assertion layer to the network.
Definition: NvInfer.h:9237
TRT_DEPRECATED INonZeroLayer * addNonZero(ITensor &input) noexcept
Add a nonzero layer to the network.
Definition: NvInfer.h:8742
IConvolutionLayer * addConvolutionNd(ITensor &input, int64_t nbOutputMaps, Dims const &kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
Add a multi-dimension convolution layer to the network.
Definition: NvInfer.h:9056
ICastLayer * addCast(ITensor &input, DataType toType) noexcept
Add a cast layer.
Definition: NvInfer.h:8811
IScaleLayer * addScaleNd(ITensor &input, ScaleMode mode, Weights shift, Weights scale, Weights power, int32_t channelAxis) noexcept
Add a multi-dimension scale layer to the network.
Definition: NvInfer.h:9135
char const * getName() const noexcept
Returns the name associated with the network.
Definition: NvInfer.h:8935
IParametricReLULayer * addParametricReLU(ITensor &input, ITensor &slope) noexcept
Add a parametric ReLU layer to the network.
Definition: NvInfer.h:9034
ITensor * getOutput(int32_t index) const noexcept
Get the output tensor specified by the given index.
Definition: NvInfer.h:8556
ITensor * getInput(int32_t index) const noexcept
Get the input tensor specified by the given index.
Definition: NvInfer.h:8526
TRT_DEPRECATED ITopKLayer * addTopK(ITensor &input, TopKOperation op, int32_t k, uint32_t reduceAxes) noexcept
Add a TopK layer to the network.
Definition: NvInfer.h:8618
IDequantizeLayer * addDequantize(ITensor &input, ITensor &scale, DataType outputType) noexcept
Add a dequantization layer to the network.
Definition: NvInfer.h:9360
bool unmarkOutputForShapes(ITensor &tensor) noexcept
Undo markOutputForShapes.
Definition: NvInfer.h:9016
IFillLayer * addFill(Dims const &dimensions, FillOperation op, DataType outputType) noexcept
Add a fill layer to the network.
Definition: NvInfer.h:9263
ILoop * addLoop() noexcept
Add a loop to the network.
Definition: NvInfer.h:9166
bool markUnfusedTensorsAsDebugTensors() noexcept
Mark unfused tensors as debug tensors.
Definition: NvInfer.h:8283
TRT_NODISCARD INormalizationLayer * addNormalizationV2(ITensor &input, ITensor &scale, ITensor &bias, uint32_t axesMask) noexcept
Add a normalization layer to the network.
Definition: NvInfer.h:9891
IActivationLayer * addActivation(ITensor &input, ActivationType type) noexcept
Add an activation layer to the network.
Definition: NvInfer.h:8317
ISliceLayer * addSlice(ITensor &input, Dims const &start, Dims const &size, Dims const &stride) noexcept
Add a slice layer to the network.
Definition: NvInfer.h:8897
virtual IBuilder & getBuilder() const noexcept
Return the builder from which this INetworkDefinition was created.
Definition: NvInfer.h:9787
ILayer * getLayer(int32_t index) const noexcept
Get the layer specified by the given index.
Definition: NvInfer.h:8498
bool isDebugTensor(ITensor const &tensor) const noexcept
Check if a tensor is marked as debug tensor.
Definition: NvInfer.h:8261
bool getFlag(NetworkDefinitionCreationFlag networkDefinitionCreationFlag) const noexcept
Returns true if the network definition creation flag is set.
Definition: NvInfer.h:8987
IIfConditional * addIfConditional() noexcept
Add an if-then-else to the network.
Definition: NvInfer.h:9181
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInfer.h:9337
ISqueezeLayer * addSqueeze(ITensor &input, ITensor &axes) noexcept
Add a squeeze layer to the network.
Definition: NvInfer.h:9844
TRT_DEPRECATED INMSLayer * addNMS(ITensor &boxes, ITensor &scores, ITensor &maxOutputBoxesPerClass) noexcept
Add a non-maximum suppression layer to the network.
Definition: NvInfer.h:9511
IAttention * addAttentionV2(ITensor &query, ITensor &key, ITensor &value, AttentionNormalizationOp normOp, CausalMaskKind causalKind) noexcept
Add an attention to the network with explicit causal mask kind.
Definition: NvInfer.h:9662
IReverseSequenceLayer * addReverseSequence(ITensor &input, ITensor &sequenceLens) noexcept
Add a ReverseSequence layer to the network.
Definition: NvInfer.h:9548
TRT_DEPRECATED IDynamicQuantizeLayer * addDynamicQuantize(ITensor &input, int32_t axis, int32_t blockSize, DataType outputType, DataType scaleType) noexcept
Add a dynamic quantization layer to the network.
Definition: NvInfer.h:9431
int32_t getNbInputs() const noexcept
Get the number of inputs in the network.
Definition: NvInfer.h:8510
NetworkDefinitionCreationFlags getFlags() const noexcept
Get the network definition creation flags for this network definition object. Defaults to 0.
Definition: NvInfer.h:8975
IQuantizeLayer * addQuantize(ITensor &input, ITensor &scale, DataType outputType) noexcept
Add a quantization layer to the network.
Definition: NvInfer.h:9404
IDynamicQuantizeLayer * addDynamicQuantizeV2(ITensor &input, Dims const &blockShape, DataType outputType, DataType scaleType) noexcept
Add a dynamic quantization layer to the network.
Definition: NvInfer.h:9455
IReduceLayer * addReduce(ITensor &input, ReduceOperation operation, uint32_t reduceAxes, bool keepDimensions) noexcept
Add a reduce layer to the network.
Definition: NvInfer.h:8582
IUnaryLayer * addUnary(ITensor &input, UnaryOperation operation) noexcept
Add a unary layer to the network.
Definition: NvInfer.h:8441
IGridSampleLayer * addGridSample(ITensor &input, ITensor &grid) noexcept
Add a GridSample layer to the network.
Definition: NvInfer.h:9489
void removeTensor(ITensor &tensor) noexcept
remove a tensor from the network definition.
Definition: NvInfer.h:8826
bool areWeightsMarkedRefittable(char const *name) const noexcept
Whether the weight has been marked as refittable.
Definition: NvInfer.h:9825
ISelectLayer * addSelect(ITensor &condition, ITensor &thenInput, ITensor &elseInput) noexcept
Add a select layer to the network.
Definition: NvInfer.h:9220
IScatterLayer * addScatter(ITensor &data, ITensor &indices, ITensor &updates, ScatterMode mode) noexcept
Add a Scatter layer to the network with specified mode and axis=0.
Definition: NvInfer.h:9380
TRT_DEPRECATED INormalizationLayer * addNormalization(ITensor &input, ITensor &scale, ITensor &bias, uint32_t axesMask) noexcept
Add a normalization layer to the network.
Definition: NvInfer.h:9580
int32_t getNbLayers() const noexcept
Get the number of layers in the network.
Definition: NvInfer.h:8484
TRT_DEPRECATED bool hasImplicitBatchDimension() const noexcept
Query whether the network was created with an implicit batch dimension.
Definition: NvInfer.h:8965
apiv::VNetworkDefinition * mImpl
Definition: NvInfer.h:9897
IKVCacheUpdateLayer * addKVCacheUpdate(ITensor &cache, ITensor &update, ITensor &writeIndices, KVCacheMode cacheMode) noexcept
Add a KVCacheUpdate layer to the network.
Definition: NvInfer.h:9721
bool markOutputForShapes(ITensor &tensor) noexcept
Enable tensor's value to be computed by IExecutionContext::getShapeBinding.
Definition: NvInfer.h:9004
IOneHotLayer * addOneHot(ITensor &indices, ITensor &values, ITensor &depth, int32_t axis) noexcept
Add a OneHot layer to the network.
Definition: NvInfer.h:8472
IScaleLayer * addScale(ITensor &input, ScaleMode mode, Weights shift, Weights scale, Weights power) noexcept
Add a Scale layer to the network.
Definition: NvInfer.h:8362
IPluginV3Layer * addPluginV3(ITensor *const *inputs, int32_t nbInputs, ITensor *const *shapeInputs, int32_t nbShapeInputs, IPluginV3 &plugin) noexcept
Add a plugin layer implementing the IPluginV3 interface to the network.
Definition: NvInfer.h:8877
void unmarkOutput(ITensor &tensor) noexcept
unmark a tensor as a network output.
Definition: NvInfer.h:8838
IIdentityLayer * addIdentity(ITensor &input) noexcept
Add an identity layer.
Definition: NvInfer.h:8796
IGatherLayer * addGatherV2(ITensor &data, ITensor &indices, GatherMode mode) noexcept
Add gather with specified mode, axis=0 and nbElementWiseDims=0.
Definition: NvInfer.h:8683
INonZeroLayer * addNonZero(ITensor &input, DataType indicesType) noexcept
Add a nonzero layer to the network.
Definition: NvInfer.h:8758
IElementWiseLayer * addElementWise(ITensor &input1, ITensor &input2, ElementWiseOperation op) noexcept
Add an elementwise layer to the network.
Definition: NvInfer.h:8419
IConstantLayer * addConstant(Dims const &dimensions, Weights weights) noexcept
Add a constant layer to the network.
Definition: NvInfer.h:8782
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInfer.h:9322
IPoolingLayer * addPoolingNd(ITensor &input, PoolingType type, Dims const &windowSize) noexcept
Add a multi-dimension pooling layer to the network.
Definition: NvInfer.h:9076
INMSLayer * addNMS(ITensor &boxes, ITensor &scores, ITensor &maxOutputBoxesPerClass, DataType indicesType) noexcept
Add a non-maximum suppression layer to the network.
Definition: NvInfer.h:9531
IRaggedSoftMaxLayer * addRaggedSoftMax(ITensor &input, ITensor &bounds) noexcept
Add a RaggedSoftMax layer to the network.
Definition: NvInfer.h:8702
IShapeLayer * addShape(ITensor &input) noexcept
Add a shape layer to the network.
Definition: NvInfer.h:8951
IGatherLayer * addGather(ITensor &data, ITensor &indices, int32_t axis) noexcept
Add gather with mode GatherMode::kDEFAULT and specified axis and nbElementWiseDims=0.
Definition: NvInfer.h:8667
bool unmarkWeightsRefittable(char const *name) noexcept
Unmark weights as refittable when the builder flag kREFIT_INDIVIDUAL is set.
Definition: NvInfer.h:9812
bool markWeightsRefittable(char const *name) noexcept
Mark weights as refittable when the builder flag kREFIT_INDIVIDUAL is set.
Definition: NvInfer.h:9800
IRotaryEmbeddingLayer * addRotaryEmbedding(ITensor &input, ITensor &cosCache, ITensor &sinCache, bool interleaved, int32_t rotaryEmbeddingDim) noexcept
Add a Rotary Position Embedding (RoPE) layer to the network.
Definition: NvInfer.h:9687
IDeconvolutionLayer * addDeconvolutionNd(ITensor &input, int64_t nbOutputMaps, Dims kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
Add a multi-dimension deconvolution layer to the network.
Definition: NvInfer.h:9098
IResizeLayer * addResize(ITensor &input) noexcept
Add a resize layer to the network.
Definition: NvInfer.h:9152
IUnsqueezeLayer * addUnsqueeze(ITensor &input, ITensor &axes) noexcept
Add an unsqueeze layer to the network.
Definition: NvInfer.h:9865
IMatrixMultiplyLayer * addMatrixMultiply(ITensor &input0, MatrixOperation op0, ITensor &input1, MatrixOperation op1) noexcept
Add a MatrixMultiply layer to the network.
Definition: NvInfer.h:8723
ISoftMaxLayer * addSoftMax(ITensor &input) noexcept
Add a SoftMax layer to the network.
Definition: NvInfer.h:8375
bool unmarkDebug(ITensor &tensor) noexcept
Unmark a tensor as a debug tensor.
Definition: NvInfer.h:8251
TRT_DEPRECATED IAttention * addAttention(ITensor &query, ITensor &key, ITensor &value, AttentionNormalizationOp normOp, bool causal) noexcept
Add an attention to the network.
Definition: NvInfer.h:9632
virtual ~INetworkDefinition() noexcept=0
IEinsumLayer * addEinsum(ITensor *const *inputs, int32_t nbInputs, char const *equation) noexcept
Add an Einsum layer to the network.
Definition: NvInfer.h:9471
void markOutput(ITensor &tensor) noexcept
Mark a tensor as a network output.
Definition: NvInfer.h:8217
TRT_DEPRECATED IPluginV2Layer * addPluginV2(ITensor *const *inputs, int32_t nbInputs, IPluginV2 &plugin) noexcept
Add a plugin layer to the network using the IPluginV2 interface.
Definition: NvInfer.h:8859
IPaddingLayer * addPaddingNd(ITensor &input, Dims const &prePadding, Dims const &postPadding) noexcept
Add a padding layer to the network. Only 2D padding is currently supported.
Definition: NvInfer.h:9279
int32_t getNbOutputs() const noexcept
Get the number of outputs in the network.
Definition: NvInfer.h:8540
bool setWeightsName(Weights weights, char const *name) noexcept
Associate a name with all current uses of the given weights.
Definition: NvInfer.h:9303
TRT_NODISCARD IDistCollectiveLayer * addDistCollective(ITensor &input, CollectiveOperation distCollectiveOp, ReduceOperation reduceOp, int64_t root, int64_t *groups, int64_t groupSize) noexcept
Add a DistCollective layer to the network.
Definition: NvInfer.h:9775
IMoELayer * addMoE(ITensor &hiddenStates, ITensor &selectedExpertsForTokens, ITensor &scoresForSelectedExperts) noexcept
Add a MoE (Mixture of Experts) layer to the network.
Definition: NvInfer.h:9743
bool unmarkUnfusedTensorsAsDebugTensors() noexcept
Undo the marking of unfused tensors as debug tensors.
Definition: NvInfer.h:8297
Forward declaration of IEngineInspector for use by other interfaces.
Definition: NvInferRuntime.h:51
Definition: NvInfer.h:3665
DataType getIndicesType() const noexcept
Return the NonZero layer indices type.
Definition: NvInfer.h:3689
virtual ~INonZeroLayer() noexcept=0
bool setIndicesType(DataType type) noexcept
Set the indices type for the layer.
Definition: NvInfer.h:3677
A normalization layer in a network definition.
Definition: NvInfer.h:6428
float getEpsilon() const noexcept
Get the epsilon value used for the normalization calculation.
Definition: NvInfer.h:6447
uint32_t getAxes() const noexcept
Get the axes value used for the normalization calculation.
Definition: NvInfer.h:6467
void setEpsilon(float eps) noexcept
Set the epsilon value used for the normalization calculation.
Definition: NvInfer.h:6437
virtual ~INormalizationLayer() noexcept=0
TRT_NODISCARD bool isV2() const noexcept
Returns true if this layer was created through addNormalizationV2().
Definition: NvInfer.h:6509
apiv::VNormalizationLayer * mImpl
Definition: NvInfer.h:6515
int64_t getNbGroups() const noexcept
Get the number of groups used to split the channels for the normalization calculation.
Definition: NvInfer.h:6498
void setAxes(uint32_t axesMask) noexcept
Set the reduction axes for the normalization calculation.
Definition: NvInfer.h:6457
void setNbGroups(int64_t nbGroups) noexcept
Set the number of groups used to split the channels in the normalization calculation.
Definition: NvInfer.h:6488
A OneHot layer in a network definition.
Definition: NvInfer.h:6042
apiv::VOneHotLayer * mImpl
Definition: NvInfer.h:6063
void setAxis(int32_t axis) noexcept
Set the axis parameter.
Definition: NvInfer.h:6049
virtual ~IOneHotLayer() noexcept=0
int32_t getAxis() const noexcept
Get the value of the axis parameter.
Definition: NvInfer.h:6057
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:2575
Layer that represents a padding operation.
Definition: NvInfer.h:2850
Dims getPostPaddingNd() const noexcept
Get the padding that is applied at the end of the tensor.
Definition: NvInfer.h:2899
void setPrePaddingNd(Dims const &padding) noexcept
Set the padding that is applied at the start of the tensor.
Definition: NvInfer.h:2861
virtual ~IPaddingLayer() noexcept=0
void setPostPaddingNd(Dims const &padding) noexcept
Set the padding that is applied at the end of the tensor.
Definition: NvInfer.h:2887
Dims getPrePaddingNd() const noexcept
Get the padding that is applied at the start of the tensor.
Definition: NvInfer.h:2873
apiv::VPaddingLayer * mImpl
Definition: NvInfer.h:2905
Layer that represents a parametric ReLU operation.
Definition: NvInfer.h:3890
apiv::VParametricReLULayer * mImpl
Definition: NvInfer.h:3892
virtual ~IParametricReLULayer() noexcept=0
Single registration point for all plugins in an application. It is used to find plugin implementation...
Definition: NvInferRuntimeCommon.h:56
Plugin class for user-implemented layers.
Definition: NvInferRuntimePlugin.h:139
Layer type for pluginV2.
Definition: NvInfer.h:2543
apiv::VPluginV2Layer * mImpl
Definition: NvInfer.h:2556
IPluginV2 & getPlugin() noexcept
Get the plugin for the layer.
Definition: NvInfer.h:2550
virtual ~IPluginV2Layer() noexcept=0
Layer type for V3 plugins.
Definition: NvInfer.h:2572
virtual ~IPluginV3Layer() noexcept=0
IPluginV3 & getPlugin() noexcept
Get the plugin for the layer.
Definition: NvInfer.h:2579
apiv::VPluginV3Layer * mImpl
Definition: NvInfer.h:2585
A Pooling layer in a network definition.
Definition: NvInfer.h:1292
PoolingType getPoolingType() const noexcept
Get the type of activation to be performed.
Definition: NvInfer.h:1311
PaddingMode getPaddingMode() const noexcept
Get the padding mode.
Definition: NvInfer.h:1444
Dims getPostPadding() const noexcept
Get the padding.
Definition: NvInfer.h:1420
bool getAverageCountExcludesPadding() const noexcept
Get whether average pooling uses as a denominator the overlap area between the window and the unpadde...
Definition: NvInfer.h:1364
Dims getPrePadding() const noexcept
Get the pre-padding.
Definition: NvInfer.h:1392
void setPoolingType(PoolingType type) noexcept
Set the type of activation to be performed.
Definition: NvInfer.h:1301
void setWindowSizeNd(Dims const &windowSize) noexcept
Set the multi-dimension window size for pooling.
Definition: NvInfer.h:1457
void setPaddingMode(PaddingMode paddingMode) noexcept
Set the padding mode.
Definition: NvInfer.h:1433
Dims getWindowSizeNd() const noexcept
Get the multi-dimension window size for pooling.
Definition: NvInfer.h:1467
void setAverageCountExcludesPadding(bool exclusive) noexcept
Set whether average pooling uses as a denominator the overlap area between the window and the unpadde...
Definition: NvInfer.h:1353
void setPaddingNd(Dims const &padding) noexcept
Set the multi-dimension padding for pooling.
Definition: NvInfer.h:1511
float getBlendFactor() const noexcept
Get the blending factor for the max_average_blend mode: max_average_blendPool = (1-blendFactor)*maxPo...
Definition: NvInfer.h:1339
void setStrideNd(Dims const &stride) noexcept
Set the multi-dimension stride for pooling.
Definition: NvInfer.h:1482
Dims getStrideNd() const noexcept
Get the multi-dimension stride for pooling.
Definition: NvInfer.h:1492
Dims getPaddingNd() const noexcept
Get the multi-dimension padding for pooling.
Definition: NvInfer.h:1523
virtual ~IPoolingLayer() noexcept=0
void setPostPadding(Dims const &padding) noexcept
Set the multi-dimension post-padding for pooling.
Definition: NvInfer.h:1410
void setPrePadding(Dims const &padding) noexcept
Set the multi-dimension pre-padding for pooling.
Definition: NvInfer.h:1382
void setBlendFactor(float blendFactor) noexcept
Set the blending factor for the max_average_blend mode: max_average_blendPool = (1-blendFactor)*maxPo...
Definition: NvInfer.h:1326
A Quantize layer in a network definition.
Definition: NvInfer.h:5407
void setToType(DataType toType) noexcept
Set the Quantize layer output type.
Definition: NvInfer.h:5468
bool setBlockShape(Dims const &blockShape) noexcept
Set the shape of the quantization block.
Definition: NvInfer.h:5441
void setAxis(int32_t axis) noexcept
Set the quantization axis.
Definition: NvInfer.h:5428
virtual ~IQuantizeLayer() noexcept=0
TRT_NODISCARD Dims getBlockShape() const noexcept
Get the shape of the quantization block.
Definition: NvInfer.h:5452
int32_t getAxis() const noexcept
Get the quantization axis.
Definition: NvInfer.h:5417
DataType getToType() const noexcept
Return the Quantize layer output type.
Definition: NvInfer.h:5480
A RaggedSoftmax layer in a network definition.
Definition: NvInfer.h:3716
apiv::VRaggedSoftMaxLayer * mImpl
Definition: NvInfer.h:3718
virtual ~IRaggedSoftMaxLayer() noexcept=0
A recurrence layer in a network definition.
Definition: NvInfer.h:4600
virtual ~IRecurrenceLayer() noexcept=0
Layer that represents a reduction across a non-bool tensor.
Definition: NvInfer.h:2768
void setKeepDimensions(bool keepDimensions) noexcept
Set the boolean that specifies whether or not to keep the reduced dimensions for the layer.
Definition: NvInfer.h:2815
virtual ~IReduceLayer() noexcept=0
void setOperation(ReduceOperation op) noexcept
Set the reduce operation for the layer.
Definition: NvInfer.h:2775
ReduceOperation getOperation() const noexcept
Get the reduce operation for the layer.
Definition: NvInfer.h:2785
uint32_t getReduceAxes() const noexcept
Get the axes over which to reduce for the layer.
Definition: NvInfer.h:2805
void setReduceAxes(uint32_t reduceAxes) noexcept
Set the axes over which to reduce.
Definition: NvInfer.h:2795
apiv::VReduceLayer * mImpl
Definition: NvInfer.h:2831
bool getKeepDimensions() const noexcept
Get the boolean that specifies whether or not to keep the reduced dimensions for the layer.
Definition: NvInfer.h:2825
A resize layer in a network definition.
Definition: NvInfer.h:4069
void setSelectorForSinglePixel(ResizeSelector selector) noexcept
Set coordinate selector function when resized to single pixel.
Definition: NvInfer.h:4230
void setNearestRounding(ResizeRoundMode value) noexcept
Set rounding mode for nearest neighbor resize.
Definition: NvInfer.h:4254
int32_t getScales(int32_t size, float *scales) const noexcept
Copies resize scales to scales[0, ..., nbScales-1], where nbScales is the number of scales that were ...
Definition: NvInfer.h:4148
void setOutputDimensions(Dims const &dimensions) noexcept
Set the output dimensions.
Definition: NvInfer.h:4089
void setCubicCoeff(float A) noexcept
Set the coefficient 'A' used in cubic interpolation.
Definition: NvInfer.h:4286
void setScales(float const *scales, int32_t nbScales) noexcept
Set the resize scales.
Definition: NvInfer.h:4129
virtual ~IResizeLayer() noexcept=0
float getCubicCoeff() const noexcept
Get the coefficient 'A' used in cubic interpolation.
Definition: NvInfer.h:4296
ResizeSelector getSelectorForSinglePixel() const noexcept
Get the coordinate selector function when resized to single pixel.
Definition: NvInfer.h:4240
InterpolationMode getResizeMode() const noexcept
Get resize mode for an input tensor.
Definition: NvInfer.h:4170
void setCoordinateTransformation(ResizeCoordinateTransformation coordTransform) noexcept
Set coordinate transformation function.
Definition: NvInfer.h:4205
void setExcludeOutside(bool excludeFlag) noexcept
Set the state for excluding outside pixels.
Definition: NvInfer.h:4309
void setResizeMode(InterpolationMode interpolationMode) noexcept
Set resize mode for an input tensor.
Definition: NvInfer.h:4160
Dims getOutputDimensions() const noexcept
Get the output dimensions.
Definition: NvInfer.h:4099
ResizeRoundMode getNearestRounding() const noexcept
Get rounding mode for nearest neighbor resize.
Definition: NvInfer.h:4264
bool getExcludeOutside() const noexcept
Get the state for excluding outside pixels.
Definition: NvInfer.h:4319
ResizeCoordinateTransformation getCoordinateTransformation() const noexcept
Get coordinate transformation function.
Definition: NvInfer.h:4215
A ReverseSequence layer in a network definition.
Definition: NvInfer.h:6354
void setSequenceAxis(int32_t sequenceAxis) noexcept
Set the sequence axis. Default is 0.
Definition: NvInfer.h:6387
int32_t getBatchAxis() const noexcept
Return the batch axis. Return 1 if no batch axis was set.
Definition: NvInfer.h:6374
apiv::VReverseSequenceLayer * mImpl
Definition: NvInfer.h:6403
int32_t getSequenceAxis() const noexcept
Return the sequence axis. Return 0 if no sequence axis was set.
Definition: NvInfer.h:6397
void setBatchAxis(int32_t batchAxis) noexcept
Set the batch axis. Default is 1.
Definition: NvInfer.h:6364
virtual ~IReverseSequenceLayer() noexcept=0
Layer that implements Rotary Position Embedding (RoPE) (https://arxiv.org/abs/2104....
Definition: NvInfer.h:7453
TRT_NODISCARD int32_t getRotaryEmbeddingDim() const noexcept
Get the number of hidden dimensions participating in RoPE. The default value is 0,...
Definition: NvInfer.h:7493
void setInterleaved(bool interleaved) noexcept
Set whether the input is in interleaved format, i.e., whether the 2-d vectors rotated are taken from ...
Definition: NvInfer.h:7460
virtual ~IRotaryEmbeddingLayer() noexcept=0
TRT_NODISCARD bool setRotaryEmbeddingDim(int32_t rotaryEmbeddingDim) noexcept
Set the number of hidden dimensions participating in RoPE. The default value is 0,...
Definition: NvInfer.h:7482
apiv::VRotaryEmbeddingLayer * mImpl
Definition: NvInfer.h:7515
TRT_NODISCARD bool getInterleaved() const noexcept
Get whether the input is in interleaved format. The default value is false.
Definition: NvInfer.h:7471
A Scale layer in a network definition.
Definition: NvInfer.h:1693
Weights getScale() const noexcept
Get the scale value.
Definition: NvInfer.h:1750
Weights getPower() const noexcept
Get the power value.
Definition: NvInfer.h:1770
void setScale(Weights scale) noexcept
Set the scale value.
Definition: NvInfer.h:1740
void setPower(Weights power) noexcept
Set the power value.
Definition: NvInfer.h:1760
ScaleMode getMode() const noexcept
Get the scale mode.
Definition: NvInfer.h:1710
void setShift(Weights shift) noexcept
Set the shift value.
Definition: NvInfer.h:1720
void setChannelAxis(int32_t channelAxis) noexcept
Set the channel axis.
Definition: NvInfer.h:1806
virtual ~IScaleLayer() noexcept=0
Weights getShift() const noexcept
Get the shift value.
Definition: NvInfer.h:1730
void setMode(ScaleMode mode) noexcept
Set the scale mode.
Definition: NvInfer.h:1700
int32_t getChannelAxis() const noexcept
Get the channel axis.
Definition: NvInfer.h:1785
A scatter layer in a network definition. Supports several kinds of scattering.
Definition: NvInfer.h:5967
void setMode(ScatterMode mode) noexcept
Set the scatter mode.
Definition: NvInfer.h:5974
apiv::VScatterLayer * mImpl
Definition: NvInfer.h:6008
void setAxis(int32_t axis) noexcept
Set the axis used by ScatterMode::kELEMENTS.
Definition: NvInfer.h:5994
int32_t getAxis() const noexcept
Get the axis.
Definition: NvInfer.h:6002
ScatterMode getMode() const noexcept
Get the scatter mode.
Definition: NvInfer.h:5984
virtual ~IScatterLayer() noexcept=0
Select elements from two data tensors based on a condition tensor.
Definition: NvInfer.h:4918
virtual ~ISelectLayer() noexcept=0
Layer type for getting shape of a tensor.
Definition: NvInfer.h:3381
virtual ~IShapeLayer() noexcept=0
apiv::VShapeLayer * mImpl
Definition: NvInfer.h:3383
Layer type for shuffling data.
Definition: NvInfer.h:2940
apiv::VShuffleLayer * mImpl
Definition: NvInfer.h:3098
virtual ~IShuffleLayer() noexcept=0
void setFirstTranspose(Permutation permutation) noexcept
Set the permutation applied by the first transpose operation.
Definition: NvInfer.h:2951
void setSecondTranspose(Permutation permutation) noexcept
Set the permutation applied by the second transpose operation.
Definition: NvInfer.h:3051
Dims getReshapeDimensions() const noexcept
Get the reshaped dimensions.
Definition: NvInfer.h:3004
void setReshapeDimensions(Dims const &dimensions) noexcept
Set the reshaped dimensions.
Definition: NvInfer.h:2991
Permutation getFirstTranspose() const noexcept
Get the permutation applied by the first transpose operation.
Definition: NvInfer.h:2963
Permutation getSecondTranspose() const noexcept
Get the permutation applied by the second transpose operation.
Definition: NvInfer.h:3063
bool getZeroIsPlaceholder() const noexcept
Get meaning of 0 in reshape dimensions.
Definition: NvInfer.h:3092
void setZeroIsPlaceholder(bool zeroIsPlaceholder) noexcept
Set meaning of 0 in reshape dimensions.
Definition: NvInfer.h:3079
Slices an input tensor into an output tensor based on the offset and strides.
Definition: NvInfer.h:3194
void setStride(Dims const &stride) noexcept
Set the stride for computing the output slice data.
Definition: NvInfer.h:3263
apiv::VSliceLayer * mImpl
Definition: NvInfer.h:3362
virtual ~ISliceLayer() noexcept=0
void setSize(Dims const &size) noexcept
Set the dimensions of the output slice.
Definition: NvInfer.h:3234
void setAxes(Dims const &axes) noexcept
Set the axes for this ISliceLayer.
Definition: NvInfer.h:3341
void setStart(Dims const &start) noexcept
Set the start offset that the slice layer uses to create the output slice.
Definition: NvInfer.h:3205
Dims getStart() const noexcept
Get the start offset for the slice layer.
Definition: NvInfer.h:3220
void setMode(SampleMode mode) noexcept
Set the slice mode.
Definition: NvInfer.h:3288
Dims getSize() const noexcept
Get dimensions of the output slice.
Definition: NvInfer.h:3249
SampleMode getMode() const noexcept
Get the slice mode.
Definition: NvInfer.h:3298
Dims getStride() const noexcept
Get the stride for the output slice.
Definition: NvInfer.h:3278
Dims getAxes() const noexcept
Get the axes for this ISliceLayer.
Definition: NvInfer.h:3356
A Softmax layer in a network definition.
Definition: NvInfer.h:1839
void setAxes(uint32_t axes) noexcept
Set the axis along which softmax is computed. Currently, only one axis can be set.
Definition: NvInfer.h:1861
virtual ~ISoftMaxLayer() noexcept=0
uint32_t getAxes() const noexcept
Get the axis along which softmax occurs.
Definition: NvInfer.h:1871
Layer that represents a squeeze operation, removing unit dimensions of the first input tensor on a se...
Definition: NvInfer.h:6531
apiv::VSqueezeLayer * mImpl
Definition: NvInfer.h:6548
virtual ~ISqueezeLayer() noexcept=0
A tensor in a network definition.
Definition: NvInfer.h:186
void setAllowedFormats(TensorFormats formats) noexcept
Set allowed formats for an input or output tensor. By default all formats are allowed....
Definition: NvInfer.h:364
TensorLocation getLocation() const noexcept
Get the storage location of a tensor.
Definition: NvInfer.h:322
void setDimensions(Dims const &dimensions) noexcept
Set the dimensions of a tensor.
Definition: NvInfer.h:234
void setName(char const *name) noexcept
Set the tensor name.
Definition: NvInfer.h:203
bool isExecutionTensor() const noexcept
Whether the tensor is an execution tensor.
Definition: NvInfer.h:429
char const * getName() const noexcept
Get the tensor name.
Definition: NvInfer.h:215
bool isShapeTensor() const noexcept
Whether the tensor is a shape tensor.
Definition: NvInfer.h:408
bool isNetworkInput() const noexcept
Whether the tensor is a network input.
Definition: NvInfer.h:271
TRT_DEPRECATED void setBroadcastAcrossBatch(bool broadcastAcrossBatch) noexcept
Set whether to enable broadcast of tensor across the implicit batch dimension.
Definition: NvInfer.h:296
TRT_DEPRECATED bool getBroadcastAcrossBatch() const noexcept
Check if tensor is broadcast across the implicit batch dimension.
Definition: NvInfer.h:310
bool isNetworkOutput() const noexcept
Whether the tensor is a network output.
Definition: NvInfer.h:279
DataType getType() const noexcept
Get the data type of a tensor.
Definition: NvInfer.h:263
virtual ~ITensor() noexcept=0
apiv::VTensor * mImpl
Definition: NvInfer.h:476
void setDimensionName(int32_t index, char const *name) noexcept
Name a dimension of an input tensor.
Definition: NvInfer.h:455
char const * getDimensionName(int32_t index) const noexcept
Get the name of an input dimension.
Definition: NvInfer.h:470
TRT_DEPRECATED void setLocation(TensorLocation location) noexcept
Set the storage location of a tensor.
Definition: NvInfer.h:341
Dims getDimensions() const noexcept
Get the dimensions of a tensor.
Definition: NvInfer.h:248
TensorFormats getAllowedFormats() const noexcept
Get a bitmask of TensorFormat values that the tensor supports. For a shape tensor,...
Definition: NvInfer.h:377
Class to handle tactic timing info collected from builder.
Definition: NvInfer.h:10165
int64_t queryKeys(TimingCacheKey *keyBuffer, int64_t capacity) const noexcept
Query cache keys from Timing Cache.
Definition: NvInfer.h:10231
virtual ~ITimingCache() noexcept=0
bool combine(ITimingCache const &inputCache, bool ignoreMismatch) noexcept
Combine input timing cache into local instance.
Definition: NvInfer.h:10202
TimingCacheValue query(TimingCacheKey const &key) const noexcept
Query value in a cache entry.
Definition: NvInfer.h:10248
bool update(TimingCacheKey const &key, TimingCacheValue const &value) noexcept
Update values in a cache entry.
Definition: NvInfer.h:10270
apiv::VTimingCache * mImpl
Definition: NvInfer.h:10276
bool reset() noexcept
Empty the timing cache.
Definition: NvInfer.h:10212
Layer that represents a TopK reduction.
Definition: NvInfer.h:3423
void setK(int32_t k) noexcept
Set the static k value for the layer.
Definition: NvInfer.h:3454
void setReduceAxes(uint32_t reduceAxes) noexcept
Set which axes to reduce for the layer.
Definition: NvInfer.h:3478
TopKOperation getOperation() const noexcept
Get the operation for the layer.
Definition: NvInfer.h:3440
apiv::VTopKLayer * mImpl
Definition: NvInfer.h:3537
void setOperation(TopKOperation op) noexcept
Set the operation for the layer.
Definition: NvInfer.h:3430
bool setIndicesType(DataType type) noexcept
Set the indices type for the layer.
Definition: NvInfer.h:3519
virtual ~ITopKLayer() noexcept=0
int32_t getK() const noexcept
Get the k value for the layer.
Definition: NvInfer.h:3468
uint32_t getReduceAxes() const noexcept
Get the axes to reduce for the layer.
Definition: NvInfer.h:3488
DataType getIndicesType() const noexcept
Return the TopK layer indices type.
Definition: NvInfer.h:3531
A layer that represents a trip-count limiter.
Definition: NvInfer.h:4725
virtual ~ITripLimitLayer() noexcept=0
TripLimit getTripLimit() const noexcept
Get a trip limiter type.
Definition: NvInfer.h:4730
Layer that represents an unary operation.
Definition: NvInfer.h:2655
void setOperation(UnaryOperation op) noexcept
Set the unary operation for the layer.
Definition: NvInfer.h:2664
apiv::VUnaryLayer * mImpl
Definition: NvInfer.h:2680
UnaryOperation getOperation() const noexcept
Get the unary operation for the layer.
Definition: NvInfer.h:2674
virtual ~IUnaryLayer() noexcept=0
Layer that represents an unsqueeze operation, which reshapes the first input tensor by inserting unit...
Definition: NvInfer.h:6563
virtual ~IUnsqueezeLayer() noexcept=0
apiv::VUnsqueezeLayer * mImpl
Definition: NvInfer.h:6581
An Interface class for version control.
Definition: NvInferRuntimeBase.h:282
Version information associated with a TRT interface.
Definition: NvInferRuntimeBase.h:247
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:121
Definition: NvInferRuntimeBase.h:419
Definition: NvInferRuntime.h:1652
Application-implemented logging interface for the builder, refitter and runtime.
Definition: NvInferRuntime.h:1575
Definition: NvInferPluginBase.h:206
Definition: NvInfer.h:10477
virtual bool stepComplete(char const *phaseName, int32_t step) noexcept=0
Signal that a step of an optimizer phase has finished.
virtual ~IProgressMonitor() noexcept=default
virtual void phaseFinish(char const *phaseName) noexcept=0
Signal that a phase of the optimizer has finished.
virtual void phaseStart(char const *phaseName, char const *parentPhase, int32_t nbSteps) noexcept=0
Signal that a phase of the optimizer has started.
Definition: NvInferRuntime.h:643
IBuilder * createInferBuilder(ILogger &logger) noexcept
Create an instance of an IBuilder class.
Definition: NvInfer.h:11756
The TensorRT API version 1 namespace.
Definition: NvInferSafePlugin.h:33
uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferRuntime.h:2780
ResizeSelector
The coordinate selector when resize to single pixel output.
Definition: NvInfer.h:3980
@ kFORMULA
Use formula to map the original index.
@ kUPPER
Select the upper left pixel.
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:76
MemoryPoolType
The type for memory pools used by TensorRT.
Definition: NvInfer.h:10289
AttentionIOForm
Enumerates the layout of the input/output tensors in an Attention layer.
Definition: NvInfer.h:6791
TENSORRTAPI bool setInternalLibraryPath(AsciiChar const *path) noexcept
Set a custom directory path for loading internal TensorRT libraries when building engines.
ScaleMode
Controls how shift, scale and power are applied in a Scale layer.
Definition: NvInfer.h:1650
@ kUNIFORM
Identical coefficients across all elements of the tensor.
@ kCHANNEL
Per-channel coefficients.
RuntimePlatform
Describes the intended runtime platform (operating system and CPU architecture) for the execution of ...
Definition: NvInfer.h:9920
HardwareCompatibilityLevel
Describes requirements of compatibility with GPU architectures other than that of the GPU on which th...
Definition: NvInfer.h:10397
CumulativeOperation
Enumerates the cumulative operations that may be performed by a Cumulative layer.
Definition: NvInfer.h:6599
BoundingBoxFormat
Representation of bounding box data used for the Boxes input tensor in INMSLayer.
Definition: NvInfer.h:6166
@ kCENTER_SIZES
(x_center, y_center, width, height) where (x_center, y_center) is the center point of the box
@ kCORNER_PAIRS
(x1, y1, x2, y2) where (x1, y1) and (x2, y2) are any pair of diagonal corners
UnaryOperation
Enumerates the unary operations that may be performed by a Unary layer.
Definition: NvInfer.h:2608
@ kISINF
Return true if input value equals +/- infinity for floating-point data type.
@ kCOSH
Hyperbolic cosine.
@ kACOSH
Inverse hyperbolic cosine.
@ kERF
Gauss error function.
@ kISNAN
Return true if input value is a NaN for floating-point data type.
@ kACOS
Inverse cosine.
@ kABS
Absolute value.
@ kSINH
Hyperbolic sine.
@ kROUND
Round to nearest even for floating-point data type.
@ kATANH
Inverse hyperbolic tangent.
@ kASINH
Inverse hyperbolic sine.
@ kSIGN
Sign, If input > 0, output 1; if input < 0, output -1; if input == 0, output 0.
@ kEXP
Exponentiation.
@ kATAN
Inverse tangent.
ActivationType
Enumerates the types of activation to perform in an activation layer.
Definition: NvInfer.h:143
@ kSELU
Selu activation: x>0 ? beta * x : beta * (alpha*exp(x) - alpha)
@ kTANH
TanH activation.
@ kSCALED_TANH
Scaled tanh activation: alpha*tanh(beta*x)
@ kRELU
Rectified linear activation.
@ kELU
Elu activation: x>=0 ? x : alpha * (exp(x) - 1).
@ kLEAKY_RELU
LeakyRelu activation: x>=0 ? x : alpha * x.
@ kSOFTSIGN
Softsign activation: x / (1+|x|)
@ kHARD_SIGMOID
Hard sigmoid activation: max(0, min(1, alpha*x+beta))
@ kTHRESHOLDED_RELU
Thresholded ReLU activation: x>alpha ? x : 0.
@ kSIGMOID
Sigmoid activation.
@ kCLIP
Clip activation: max(alpha, min(beta, x))
@ kGELU_TANH
GELU tanh activation: 0.5 * x * (1 + tanh(sqrt(2/pi) * (0.044715F * pow(x, 3) + x)))
@ kGELU_ERF
GELU erf activation: 0.5 * x * (1 + erf(sqrt(0.5) * x))
@ kSOFTPLUS
Parametric softplus activation: alpha*log(exp(beta*x)+1)
FillOperation
Enumerates the tensor fill operations that may performed by a fill layer.
Definition: NvInfer.h:4983
ResizeRoundMode
The rounding mode for nearest neighbor resize.
Definition: NvInfer.h:4007
@ kHALF_UP
Round half up.
@ kHALF_DOWN
Round half down.
char_t AsciiChar
Definition: NvInferRuntimeBase.h:116
CausalMaskKind
Enumerates the causal mask alignment orientation for the attention.
Definition: NvInfer.h:6763
@ kUPPER_LEFT
Diagonal anchored at top-left corner (legacy default when causal=true).
@ kLOWER_RIGHT
Diagonal anchored at bottom-right corner (decode-aligned semantics).
PaddingMode
Enumerates the modes of padding to perform in convolution, deconvolution and pooling layer,...
Definition: NvInfer.h:826
@ kSAME_LOWER
Use SAME padding, with prePadding >= postPadding.
@ kEXPLICIT_ROUND_DOWN
Use explicit padding, rounding output size down.
@ kEXPLICIT_ROUND_UP
Use explicit padding, rounding output size up.
@ kSAME_UPPER
Use SAME padding, with prePadding <= postPadding.
TripLimit
Enum that describes kinds of trip limits.
Definition: NvInfer.h:4365
@ kWHILE
Tensor is a scalar of type kBOOL. Loop terminates when value is false.
@ kCOUNT
Tensor is a scalar of type kINT32 or kINT64 that contains the trip count.
uint32_t NetworkDefinitionCreationFlags
Represents one or more NetworkDefinitionCreationFlag flags using binary OR operations....
Definition: NvInfer.h:11380
PreviewFeature
Define preview features.
Definition: NvInfer.h:10363
TilingOptimizationLevel
Define the optimization levels for Tiling.
Definition: NvInfer.h:10447
@ kFAST
Use a fast algorithm and heuristic based strategy. Slightly increases engine build time.
@ kFULL
Increase search space even wider. Significantly increases engine build time.
DataType
The type of weights and tensors. The datatypes other than kBOOL, kINT32, and kINT64 are "activation d...
Definition: NvInferRuntimeBase.h:149
uint32_t BuilderFlags
Represents one or more BuilderFlag values using binary OR operations, e.g., 1U << BuilderFlag::kDEBUG...
Definition: NvInfer.h:9949
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:1341
LayerType
The type values of layer classes.
Definition: NvInfer.h:58
@ kGRID_SAMPLE
Grid sample layer.
@ kRAGGED_SOFTMAX
Ragged softmax layer.
@ kDECONVOLUTION
Deconvolution layer.
@ kREDUCE
Reduce layer.
@ kASSERTION
Assertion layer.
@ kTOPK
TopK layer.
@ kRESIZE
Resize Layer.
@ kCAST
Cast layer.
@ kPADDING
Padding layer.
@ kSQUEEZE
Squeeze Layer.
@ kATTENTION_INPUT
Attention Input.
@ kMATRIX_MULTIPLY
Matrix multiply layer.
@ kCONDITION
Condition layer.
@ kCUMULATIVE
Cumulative layer.
@ kCONDITIONAL_INPUT
Conditional Input layer.
@ kIDENTITY
Identity layer.
@ kNORMALIZATION
Normalization layer.
@ kQUANTIZE
Quantize layer.
@ kSCATTER
Scatter layer.
@ kCONVOLUTION
Convolution layer.
@ kPARAMETRIC_RELU
Parametric ReLU layer.
@ kATTENTION_OUTPUT
Attention Output.
@ kUNSQUEEZE
Unsqueeze Layer.
@ kCONCATENATION
Concatenation layer.
@ kONE_HOT
OneHot layer.
@ kREVERSE_SEQUENCE
Reverse sequence layer.
@ kSLICE
Slice layer.
@ kEINSUM
Einsum layer.
@ kSOFTMAX
SoftMax layer.
@ kSHAPE
Shape layer.
@ kROTARY_EMBEDDING
Rotary Embedding layer.
@ kRECURRENCE
Loop Recurrence layer.
@ kDEQUANTIZE
Dequantize layer.
@ kSHUFFLE
Shuffle layer.
@ kPLUGIN_V3
PluginV3 layer.
@ kITERATOR
Loop Iterator layer.
@ kPOOLING
Pooling layer.
@ kTRIP_LIMIT
Loop Trip limit layer.
@ kSCALE
Scale layer.
@ kDYNAMIC_QUANTIZE
Dynamic Quantize layer.
@ kGATHER
Gather layer.
@ kUNARY
UnaryOp operation Layer.
@ kACTIVATION
Activation layer.
@ kELEMENTWISE
Elementwise layer.
@ kSELECT
Select layer.
@ kPLUGIN_V2
PluginV2 layer.
@ kLOOP_OUTPUT
Loop output layer.
@ kCONDITIONAL_OUTPUT
Conditional Output layer.
@ kCONSTANT
Constant layer.
@ kNON_ZERO
NonZero layer.
@ kFILL
Fill layer.
@ kKVCACHE_UPDATE
KV Cache Update layer.
@ kPLUGIN
Plugin layer.
@ kDIST_COLLECTIVE
DistCollective layer.
SampleMode
Controls how ISliceLayer and IGridSample handle out-of-bounds coordinates.
Definition: NvInfer.h:3110
@ kCLAMP
Out of bounds indices are clamped to bounds.
@ kSTRICT_BOUNDS
Fail with error when the coordinates are out of bounds.
@ kWRAP
Coordinates wrap around periodically.
GatherMode
Control form of IGatherLayer.
Definition: NvInfer.h:2349
@ kDEFAULT
Similar to ONNX Gather.
@ kELEMENT
Similar to ONNX GatherElements.
@ kND
Similar to ONNX GatherND.
MoEActType
Enumerates the activation type for the MoE layer.
Definition: NvInfer.h:7680
uint32_t TensorFormats
It is capable of representing one or more TensorFormat by binary OR operations, e....
Definition: NvInfer.h:135
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
Definition: NvInferRuntime.h:2792
NetworkDefinitionCreationFlag
List of immutable network properties expressed at network creation time. NetworkDefinitionCreationFla...
Definition: NvInfer.h:11391
ElementWiseOperation
Enumerates the binary operations that may be performed by an ElementWise layer.
Definition: NvInfer.h:2260
@ kSUB
Subtract the second element from the first.
@ kSUM
Sum of the two elements.
@ kPROD
Product of the two elements.
@ kFLOOR_DIV
Floor division of the first element by the second.
@ kEQUAL
Check if two elements are equal.
@ kAND
Logical AND of two elements.
@ kOR
Logical OR of two elements.
@ kMIN
Minimum of the two elements.
@ kPOW
The first element to the power of the second element.
@ kLESS
Check if element in first tensor is less than corresponding element in second tensor.
@ kGREATER
Check if element in first tensor is greater than corresponding element in second tensor.
@ kXOR
Logical XOR of two elements.
@ kDIV
Divide the first element by the second.
CollectiveOperation
Enumerates the collective operations that may be performed by a DistCollective layer.
Definition: NvInfer.h:2738
@ kALL_TO_ALL
All-to-all exchange.
@ kREDUCE_SCATTER
Reduce scatter.
InterpolationMode
Enumerates various modes of interpolation.
Definition: NvInfer.h:3904
@ kNEAREST
ND (0 < N <= 8) nearest neighbor resizing.
@ kCUBIC
Supports bicubic (2D) interpolation.
@ kLINEAR
Supports linear (1D), bilinear (2D), and trilinear (3D) interpolation.
BuilderFlag
List of valid modes that the builder can enable when creating an engine from a network definition.
Definition: NvInfer.h:9959
@ kWEIGHT_STREAMING
Enable weight streaming for the current engine.
@ kGPU_FALLBACK
Enable layers marked to execute on GPU if layer cannot execute on DLA.
@ kSPARSE_WEIGHTS
Allow the builder to examine weights and use optimized functions when weights have suitable sparsity.
@ kEDITABLE_TIMING_CACHE
Enable editable timing cache.
@ kSTRIP_PLAN
Strip the refittable weights from the engine plan file.
@ kMONITOR_MEMORY
Enable memory monitor during build time.
@ kDISABLE_TIMING_CACHE
Disable reuse of timing information across identical layers.
@ kREFIT
Enable building a refittable engine.
TENSORRTAPI nvinfer1::IPluginRegistry * getBuilderPluginRegistry(nvinfer1::EngineCapability capability) noexcept
Return the plugin registry for building a Standard engine, or nullptr if no registry exists.
TopKOperation
Enumerates the operations that may be performed by a TopK layer.
Definition: NvInfer.h:3395
ReduceOperation
Enumerates the reduce operations that may be performed by a Reduce layer.
Definition: NvInfer.h:2710
@ kAVG
Average of the elements.
TRT_DEPRECATED_API nvinfer1::safe::IPluginRegistry * getBuilderSafePluginRegistry(nvinfer1::EngineCapability capability) noexcept
Return the plugin registry for building a Safety engine, or nullptr if no registry exists.
ScatterMode
Control form of IScatterLayer.
Definition: NvInfer.h:5893
MatrixOperation
Enumerates the operations that may be performed on a tensor by IMatrixMultiplyLayer before multiplica...
Definition: NvInfer.h:3550
@ kTRANSPOSE
Like kNONE, but transpose the matrix dimensions.
ResizeCoordinateTransformation
The resize coordinate transformation function.
Definition: NvInfer.h:3929
LoopOutput
Enum that describes kinds of loop outputs.
Definition: NvInfer.h:4337
@ kLAST_VALUE
Output value is value of tensor for last iteration.
@ kCONCATENATE
Output value is concatenation of values of tensor for each iteration, in forward order.
@ kREVERSE
Output value is concatenation of values of tensor for each iteration, in reverse order.
KVCacheMode
Enumerates the KVCache modes that may be performed by a KVCacheUpdate layer.
Definition: NvInfer.h:7527
PoolingType
The type of pooling to perform in a pooling layer.
Definition: NvInfer.h:1263
@ kAVERAGE
Average over elements. If the tensor is padded, the count includes the padding.
@ kMAX
Maximum over elements.
@ kMAX_AVERAGE_BLEND
Blending between max and average pooling: (1-blendFactor)*maxPool + blendFactor*avgPool.
v_1_0::IProgressMonitor IProgressMonitor
Definition: NvInfer.h:10560
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntime.h:203
AttentionNormalizationOp
Enumerates the operations that may be performed by the normalization in the attention subgraph.
Definition: NvInfer.h:6731
Represents a permutation of dimensions.
Definition: NvInfer.h:2917
Declaration of EnumMaxImpl struct to store the exclusive upper bound of an enumeration type.
Definition: NvInferRuntimeBase.h:131
The key to retrieve timing cache entries.
Definition: NvInfer.h:10129
Definition: NvInfer.h:10141
uint64_t tacticHash
Hash of the selected tactic.
Definition: NvInfer.h:10143
float timingMSec
Timing of this tactic in milliseconds. Negative numbers and NaN are invalid values.
Definition: NvInfer.h:10145

  Copyright © 2024 NVIDIA Corporation
  Privacy Policy | Manage My Privacy | Do Not Sell or Share My Data | Terms of Service | Accessibility | Corporate Policies | Product Security | Contact