TensorRT for RTX 1.3.0
NvInfer.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef NV_INFER_H
19#define NV_INFER_H
20
21#include "NvInferLegacyDims.h"
22#include "NvInferRuntime.h" // IWYU pragma: export
23
35
41
46namespace nvinfer1
47{
48
56enum class LayerType : int32_t
57{
58 kCONVOLUTION = 0,
59 kCAST = 1,
60 kACTIVATION = 2,
61 kPOOLING = 3,
62 kLRN = 4,
63 kSCALE = 5,
64 kSOFTMAX = 6,
65 kDECONVOLUTION = 7,
66 kCONCATENATION = 8,
67 kELEMENTWISE = 9,
68 kPLUGIN = 10,
69 kUNARY = 11,
70 kPADDING = 12,
71 kSHUFFLE = 13,
72 kREDUCE = 14,
73 kTOPK = 15,
74 kGATHER = 16,
75 kMATRIX_MULTIPLY = 17,
76 kRAGGED_SOFTMAX = 18,
77 kCONSTANT = 19,
78 kIDENTITY = 20,
79 kPLUGIN_V2 = 21,
80 kSLICE = 22,
81 kSHAPE = 23,
82 kPARAMETRIC_RELU = 24,
83 kRESIZE = 25,
84 kTRIP_LIMIT = 26,
85 kRECURRENCE = 27,
86 kITERATOR = 28,
87 kLOOP_OUTPUT = 29,
88 kSELECT = 30,
89 kFILL = 31,
90 kQUANTIZE = 32,
91 kDEQUANTIZE = 33,
92 kCONDITION = 34,
95 kSCATTER = 37,
96 kEINSUM = 38,
97 kASSERTION = 39,
98 kONE_HOT = 40,
99 kNON_ZERO = 41,
100 kGRID_SAMPLE = 42,
101 kNMS = 43,
102 kREVERSE_SEQUENCE = 44,
103 kNORMALIZATION = 45,
104 kPLUGIN_V3 = 46,
105 kSQUEEZE = 47,
106 kUNSQUEEZE = 48,
107 kCUMULATIVE = 49,
108 kDYNAMIC_QUANTIZE = 50,
109 kATTENTION_INPUT = 51,
110 kATTENTION_OUTPUT = 52,
111 kROTARY_EMBEDDING = 53,
112 kKVCACHE_UPDATE = 54,
113};
114
120template <>
121constexpr inline int32_t EnumMax<LayerType>() noexcept
122{
123 return 55;
124}
125
132using TensorFormats = uint32_t;
133
139enum class ActivationType : int32_t
140{
141 kRELU = 0,
142 kSIGMOID = 1,
143 kTANH = 2,
144 kLEAKY_RELU = 3,
145 kELU = 4,
146 kSELU = 5,
147 kSOFTSIGN = 6,
148 kSOFTPLUS = 7,
149 kCLIP = 8,
150 kHARD_SIGMOID = 9,
151 kSCALED_TANH = 10,
152 kTHRESHOLDED_RELU = 11,
153 kGELU_ERF = 12,
154 kGELU_TANH = 13
155};
156
157
158namespace impl
159{
165template <>
167{
168 static constexpr int32_t kVALUE = 14;
169};
170} // namespace impl
171
186class ITensor : public INoCopy
187{
188public:
204 void setName(char const* name) noexcept
205 {
206 mImpl->setName(name);
207 }
208
216 char const* getName() const noexcept
217 {
218 return mImpl->getName();
219 }
220
235 void setDimensions(Dims const& dimensions) noexcept
236 {
237 mImpl->setDimensions(dimensions);
238 }
239
249 Dims getDimensions() const noexcept
250 {
251 return mImpl->getDimensions();
252 }
253
285 TRT_DEPRECATED void setType(DataType type) noexcept
286 {
287 mImpl->setType(type);
288 }
289
300 DataType getType() const noexcept
301 {
302 return mImpl->getType();
303 }
304
308 bool isNetworkInput() const noexcept
309 {
310 return mImpl->isNetworkInput();
311 }
312
316 bool isNetworkOutput() const noexcept
317 {
318 return mImpl->isNetworkOutput();
319 }
320
338 void setAllowedFormats(TensorFormats formats) noexcept
339 {
340 mImpl->setAllowedFormats(formats);
341 }
342
352 {
353 return mImpl->getAllowedFormats();
354 }
355
382 bool isShapeTensor() const noexcept
383 {
384 return mImpl->isShapeTensor();
385 }
386
403 bool isExecutionTensor() const noexcept
404 {
405 return mImpl->isExecutionTensor();
406 }
407
429 void setDimensionName(int32_t index, char const* name) noexcept
430 {
431 mImpl->setDimensionName(index, name);
432 }
433
444 char const* getDimensionName(int32_t index) const noexcept
445 {
446 return mImpl->getDimensionName(index);
447 }
448
449protected:
450 apiv::VTensor* mImpl;
451 virtual ~ITensor() noexcept = default;
452};
453
461class ILayer : public INoCopy
462{
463public:
469 LayerType getType() const noexcept
470 {
471 return mLayer->getType();
472 }
473
483 void setName(char const* name) noexcept
484 {
485 mLayer->setName(name);
486 }
487
493 char const* getName() const noexcept
494 {
495 return mLayer->getName();
496 }
497
501 int32_t getNbInputs() const noexcept
502 {
503 return mLayer->getNbInputs();
504 }
505
514 ITensor* getInput(int32_t index) const noexcept
515 {
516 return mLayer->getInput(index);
517 }
518
522 int32_t getNbOutputs() const noexcept
523 {
524 return mLayer->getNbOutputs();
525 }
526
532 ITensor* getOutput(int32_t index) const noexcept
533 {
534 return mLayer->getOutput(index);
535 }
536
549 void setInput(int32_t index, ITensor& tensor) noexcept
550 {
551 return mLayer->setInput(index, tensor);
552 }
553
582 TRT_DEPRECATED void setPrecision(DataType dataType) noexcept
583 {
584 mLayer->setPrecision(dataType);
585 }
586
594 DataType getPrecision() const noexcept
595 {
596 return mLayer->getPrecision();
597 }
598
608 TRT_DEPRECATED bool precisionIsSet() const noexcept
609 {
610 return mLayer->precisionIsSet();
611 }
612
621 {
622 mLayer->resetPrecision();
623 }
624
670 TRT_DEPRECATED void setOutputType(int32_t index, DataType dataType) noexcept
671 {
672 mLayer->setOutputType(index, dataType);
673 }
674
685 DataType getOutputType(int32_t index) const noexcept
686 {
687 return mLayer->getOutputType(index);
688 }
689
701 TRT_DEPRECATED bool outputTypeIsSet(int32_t index) const noexcept
702 {
703 return mLayer->outputTypeIsSet(index);
704 }
705
715 TRT_DEPRECATED void resetOutputType(int32_t index) noexcept
716 {
717 return mLayer->resetOutputType(index);
718 }
719
733 void setMetadata(char const* metadata) noexcept
734 {
735 mLayer->setMetadata(metadata);
736 }
737
746 char const* getMetadata() const noexcept
747 {
748 return mLayer->getMetadata();
749 }
750
751protected:
752 virtual ~ILayer() noexcept = default;
753 apiv::VLayer* mLayer;
754};
755
912enum class PaddingMode : int32_t
913{
916 kSAME_UPPER = 2,
917 kSAME_LOWER = 3,
918};
919
920namespace impl
921{
927template <>
929{
930 static constexpr int32_t kVALUE = 4;
931};
932} // namespace impl
933
947{
948public:
956 void setNbOutputMaps(int64_t nbOutputMaps) noexcept
957 {
958 mImpl->setNbOutputMaps(nbOutputMaps);
959 }
960
966 int64_t getNbOutputMaps() const noexcept
967 {
968 return mImpl->getNbOutputMaps();
969 }
970
986 void setNbGroups(int64_t nbGroups) noexcept
987 {
988 mImpl->setNbGroups(nbGroups);
989 }
990
996 int64_t getNbGroups() const noexcept
997 {
998 return mImpl->getNbGroups();
999 }
1000
1010 void setKernelWeights(Weights weights) noexcept
1011 {
1012 mImpl->setKernelWeights(weights);
1013 }
1014
1020 Weights getKernelWeights() const noexcept
1021 {
1022 return mImpl->getKernelWeights();
1023 }
1024
1035 void setBiasWeights(Weights weights) noexcept
1036 {
1037 mImpl->setBiasWeights(weights);
1038 }
1039
1045 Weights getBiasWeights() const noexcept
1046 {
1047 return mImpl->getBiasWeights();
1048 }
1049
1062 void setPrePadding(Dims const& padding) noexcept
1063 {
1064 mImpl->setPrePadding(padding);
1065 }
1066
1072 Dims getPrePadding() const noexcept
1073 {
1074 return mImpl->getPrePadding();
1075 }
1076
1089 void setPostPadding(Dims const& padding) noexcept
1090 {
1091 mImpl->setPostPadding(padding);
1092 }
1093
1099 Dims getPostPadding() const noexcept
1100 {
1101 return mImpl->getPostPadding();
1102 }
1103
1113 void setPaddingMode(PaddingMode paddingMode) noexcept
1114 {
1115 mImpl->setPaddingMode(paddingMode);
1116 }
1117
1126 {
1127 return mImpl->getPaddingMode();
1128 }
1129
1138 void setKernelSizeNd(Dims const& kernelSize) noexcept
1139 {
1140 mImpl->setKernelSizeNd(kernelSize);
1141 }
1142
1148 Dims getKernelSizeNd() const noexcept
1149 {
1150 return mImpl->getKernelSizeNd();
1151 }
1152
1163 void setStrideNd(Dims const& stride) noexcept
1164 {
1165 mImpl->setStrideNd(stride);
1166 }
1167
1173 Dims getStrideNd() const noexcept
1174 {
1175 return mImpl->getStrideNd();
1176 }
1177
1191 void setPaddingNd(Dims const& padding) noexcept
1192 {
1193 mImpl->setPaddingNd(padding);
1194 }
1195
1203 Dims getPaddingNd() const noexcept
1204 {
1205 return mImpl->getPaddingNd();
1206 }
1207
1217 void setDilationNd(Dims const& dilation) noexcept
1218 {
1219 mImpl->setDilationNd(dilation);
1220 }
1221
1227 Dims getDilationNd() const noexcept
1228 {
1229 return mImpl->getDilationNd();
1230 }
1231
1246 using ILayer::setInput;
1247
1248protected:
1249 virtual ~IConvolutionLayer() noexcept = default;
1250 apiv::VConvolutionLayer* mImpl;
1251};
1252
1267{
1268public:
1277 {
1278 mImpl->setActivationType(type);
1279 }
1280
1287 {
1288 return mImpl->getActivationType();
1289 }
1290
1301 void setAlpha(float alpha) noexcept
1302 {
1303 mImpl->setAlpha(alpha);
1304 }
1305
1315 void setBeta(float beta) noexcept
1316 {
1317 mImpl->setBeta(beta);
1318 }
1319
1324 float getAlpha() const noexcept
1325 {
1326 return mImpl->getAlpha();
1327 }
1328
1333 float getBeta() const noexcept
1334 {
1335 return mImpl->getBeta();
1336 }
1337
1338protected:
1339 virtual ~IActivationLayer() noexcept = default;
1340 apiv::VActivationLayer* mImpl;
1341};
1342
1348enum class PoolingType : int32_t
1349{
1350 kMAX = 0,
1351 kAVERAGE = 1,
1353};
1354
1355namespace impl
1356{
1362template <>
1364{
1365 static constexpr int32_t kVALUE = 3;
1366};
1367} // namespace impl
1368
1380class IPoolingLayer : public ILayer
1381{
1382public:
1390 void setPoolingType(PoolingType type) noexcept
1391 {
1392 mImpl->setPoolingType(type);
1393 }
1394
1401 {
1402 return mImpl->getPoolingType();
1403 }
1404
1415 void setBlendFactor(float blendFactor) noexcept
1416 {
1417 mImpl->setBlendFactor(blendFactor);
1418 }
1419
1428 float getBlendFactor() const noexcept
1429 {
1430 return mImpl->getBlendFactor();
1431 }
1432
1442 void setAverageCountExcludesPadding(bool exclusive) noexcept
1443 {
1444 mImpl->setAverageCountExcludesPadding(exclusive);
1445 }
1446
1454 {
1455 return mImpl->getAverageCountExcludesPadding();
1456 }
1457
1471 void setPrePadding(Dims const& padding) noexcept
1472 {
1473 mImpl->setPrePadding(padding);
1474 }
1475
1481 Dims getPrePadding() const noexcept
1482 {
1483 return mImpl->getPrePadding();
1484 }
1485
1499 void setPostPadding(Dims const& padding) noexcept
1500 {
1501 mImpl->setPostPadding(padding);
1502 }
1503
1509 Dims getPostPadding() const noexcept
1510 {
1511 return mImpl->getPostPadding();
1512 }
1513
1522 void setPaddingMode(PaddingMode paddingMode) noexcept
1523 {
1524 mImpl->setPaddingMode(paddingMode);
1525 }
1526
1534 {
1535 return mImpl->getPaddingMode();
1536 }
1537
1546 void setWindowSizeNd(Dims const& windowSize) noexcept
1547 {
1548 mImpl->setWindowSizeNd(windowSize);
1549 }
1550
1556 Dims getWindowSizeNd() const noexcept
1557 {
1558 return mImpl->getWindowSizeNd();
1559 }
1560
1571 void setStrideNd(Dims const& stride) noexcept
1572 {
1573 mImpl->setStrideNd(stride);
1574 }
1575
1581 Dims getStrideNd() const noexcept
1582 {
1583 return mImpl->getStrideNd();
1584 }
1585
1600 void setPaddingNd(Dims const& padding) noexcept
1601 {
1602 mImpl->setPaddingNd(padding);
1603 }
1604
1612 Dims getPaddingNd() const noexcept
1613 {
1614 return mImpl->getPaddingNd();
1615 }
1616
1617protected:
1618 virtual ~IPoolingLayer() noexcept = default;
1619 apiv::VPoolingLayer* mImpl;
1620};
1621
1631class ILRNLayer : public ILayer
1632{
1633public:
1643 void setWindowSize(int64_t windowSize) noexcept
1644 {
1645 mImpl->setWindowSize(windowSize);
1646 }
1647
1653 int64_t getWindowSize() const noexcept
1654 {
1655 return mImpl->getWindowSize();
1656 }
1657
1665 void setAlpha(float alpha) noexcept
1666 {
1667 mImpl->setAlpha(alpha);
1668 }
1669
1675 float getAlpha() const noexcept
1676 {
1677 return mImpl->getAlpha();
1678 }
1679
1687 void setBeta(float beta) noexcept
1688 {
1689 mImpl->setBeta(beta);
1690 }
1691
1697 float getBeta() const noexcept
1698 {
1699 return mImpl->getBeta();
1700 }
1701
1709 void setK(float k) noexcept
1710 {
1711 mImpl->setK(k);
1712 }
1713
1719 float getK() const noexcept
1720 {
1721 return mImpl->getK();
1722 }
1723
1724protected:
1725 virtual ~ILRNLayer() noexcept = default;
1726 apiv::VLRNLayer* mImpl;
1727};
1728
1734enum class ScaleMode : int32_t
1735{
1736 kUNIFORM = 0,
1737 kCHANNEL = 1,
1738 kELEMENTWISE = 2
1739};
1740
1746template <>
1747constexpr inline int32_t EnumMax<ScaleMode>() noexcept
1748{
1749 return 3;
1750}
1751
1777class IScaleLayer : public ILayer
1778{
1779public:
1785 void setMode(ScaleMode mode) noexcept
1786 {
1787 mImpl->setMode(mode);
1788 }
1789
1795 ScaleMode getMode() const noexcept
1796 {
1797 return mImpl->getMode();
1798 }
1799
1805 void setShift(Weights shift) noexcept
1806 {
1807 mImpl->setShift(shift);
1808 }
1809
1815 Weights getShift() const noexcept
1816 {
1817 return mImpl->getShift();
1818 }
1819
1825 void setScale(Weights scale) noexcept
1826 {
1827 mImpl->setScale(scale);
1828 }
1829
1835 Weights getScale() const noexcept
1836 {
1837 return mImpl->getScale();
1838 }
1839
1845 void setPower(Weights power) noexcept
1846 {
1847 mImpl->setPower(power);
1848 }
1849
1855 Weights getPower() const noexcept
1856 {
1857 return mImpl->getPower();
1858 }
1859
1870 int32_t getChannelAxis() const noexcept
1871 {
1872 return mImpl->getChannelAxis();
1873 }
1874
1891 void setChannelAxis(int32_t channelAxis) noexcept
1892 {
1893 mImpl->setChannelAxis(channelAxis);
1894 }
1895
1896protected:
1897 virtual ~IScaleLayer() noexcept = default;
1898 apiv::VScaleLayer* mImpl;
1899};
1900
1921class ISoftMaxLayer : public ILayer
1922{
1923public:
1944 void setAxes(uint32_t axes) noexcept
1945 {
1946 mImpl->setAxes(axes);
1947 }
1948
1954 uint32_t getAxes() const noexcept
1955 {
1956 return mImpl->getAxes();
1957 }
1958
1959protected:
1960 virtual ~ISoftMaxLayer() noexcept = default;
1961 apiv::VSoftMaxLayer* mImpl;
1962};
1963
1977{
1978public:
1990 void setAxis(int32_t axis) noexcept
1991 {
1992 mImpl->setAxis(axis);
1993 }
1994
2000 int32_t getAxis() const noexcept
2001 {
2002 return mImpl->getAxis();
2003 }
2004
2005protected:
2006 virtual ~IConcatenationLayer() noexcept = default;
2007 apiv::VConcatenationLayer* mImpl;
2008};
2009
2018{
2019public:
2027 void setNbOutputMaps(int64_t nbOutputMaps) noexcept
2028 {
2029 mImpl->setNbOutputMaps(nbOutputMaps);
2030 }
2031
2037 int64_t getNbOutputMaps() const noexcept
2038 {
2039 return mImpl->getNbOutputMaps();
2040 }
2041
2057 void setNbGroups(int64_t nbGroups) noexcept
2058 {
2059 mImpl->setNbGroups(nbGroups);
2060 }
2061
2067 int64_t getNbGroups() const noexcept
2068 {
2069 return mImpl->getNbGroups();
2070 }
2071
2081 void setKernelWeights(Weights weights) noexcept
2082 {
2083 mImpl->setKernelWeights(weights);
2084 }
2085
2091 Weights getKernelWeights() const noexcept
2092 {
2093 return mImpl->getKernelWeights();
2094 }
2095
2106 void setBiasWeights(Weights weights) noexcept
2107 {
2108 mImpl->setBiasWeights(weights);
2109 }
2110
2116 Weights getBiasWeights() const noexcept
2117 {
2118 return mImpl->getBiasWeights();
2119 }
2120
2133 void setPrePadding(Dims const& padding) noexcept
2134 {
2135 mImpl->setPrePadding(padding);
2136 }
2137
2143 Dims getPrePadding() const noexcept
2144 {
2145 return mImpl->getPrePadding();
2146 }
2147
2160 void setPostPadding(Dims const& padding) noexcept
2161 {
2162 mImpl->setPostPadding(padding);
2163 }
2164
2170 Dims getPostPadding() const noexcept
2171 {
2172 return mImpl->getPostPadding();
2173 }
2174
2184 void setPaddingMode(PaddingMode paddingMode) noexcept
2185 {
2186 mImpl->setPaddingMode(paddingMode);
2187 }
2188
2197 {
2198 return mImpl->getPaddingMode();
2199 }
2200
2211 void setKernelSizeNd(Dims const& kernelSize) noexcept
2212 {
2213 mImpl->setKernelSizeNd(kernelSize);
2214 }
2215
2221 Dims getKernelSizeNd() const noexcept
2222 {
2223 return mImpl->getKernelSizeNd();
2224 }
2225
2238 void setStrideNd(Dims const& stride) noexcept
2239 {
2240 mImpl->setStrideNd(stride);
2241 }
2242
2248 Dims getStrideNd() const noexcept
2249 {
2250 return mImpl->getStrideNd();
2251 }
2252
2266 void setPaddingNd(Dims const& padding) noexcept
2267 {
2268 mImpl->setPaddingNd(padding);
2269 }
2270
2278 Dims getPaddingNd() const noexcept
2279 {
2280 return mImpl->getPaddingNd();
2281 }
2282
2295 using ILayer::setInput;
2296
2304 void setDilationNd(Dims const& dilation) noexcept
2305 {
2306 mImpl->setDilationNd(dilation);
2307 }
2308
2314 Dims getDilationNd() const noexcept
2315 {
2316 return mImpl->getDilationNd();
2317 }
2318
2319protected:
2320 virtual ~IDeconvolutionLayer() noexcept = default;
2321 apiv::VDeconvolutionLayer* mImpl;
2322};
2323
2336enum class ElementWiseOperation : int32_t
2337{
2338 kSUM = 0,
2339 kPROD = 1,
2340 kMAX = 2,
2341 kMIN = 3,
2342 kSUB = 4,
2343 kDIV = 5,
2344 kPOW = 6,
2345 kFLOOR_DIV = 7,
2346 kAND = 8,
2347 kOR = 9,
2348 kXOR = 10,
2349 kEQUAL = 11,
2350 kGREATER = 12,
2351 kLESS = 13
2352};
2353
2354namespace impl
2355{
2361template <>
2363{
2364 static constexpr int32_t kVALUE = 14;
2365};
2366} // namespace impl
2367
2388{
2389public:
2400 {
2401 return mImpl->setOperation(op);
2402 }
2403
2412 {
2413 return mImpl->getOperation();
2414 }
2415
2416protected:
2417 apiv::VElementWiseLayer* mImpl;
2418 virtual ~IElementWiseLayer() noexcept = default;
2419};
2420
2426enum class GatherMode : int32_t
2427{
2428 kDEFAULT = 0,
2429 kELEMENT = 1,
2430 kND = 2
2431};
2432
2438template <>
2439constexpr inline int32_t EnumMax<GatherMode>() noexcept
2440{
2441 return 3;
2442}
2443
2520class IGatherLayer : public ILayer
2521{
2522public:
2532 void setGatherAxis(int32_t axis) noexcept
2533 {
2534 mImpl->setGatherAxis(axis);
2535 }
2536
2544 int32_t getGatherAxis() const noexcept
2545 {
2546 return mImpl->getGatherAxis();
2547 }
2548
2567 void setNbElementWiseDims(int32_t elementWiseDims) noexcept
2568 {
2569 mImpl->setNbElementWiseDims(elementWiseDims);
2570 }
2571
2577 int32_t getNbElementWiseDims() const noexcept
2578 {
2579 return mImpl->getNbElementWiseDims();
2580 }
2581
2587 void setMode(GatherMode mode) noexcept
2588 {
2589 mImpl->setMode(mode);
2590 }
2591
2597 GatherMode getMode() const noexcept
2598 {
2599 return mImpl->getMode();
2600 }
2601
2602protected:
2603 apiv::VGatherLayer* mImpl;
2604 virtual ~IGatherLayer() noexcept = default;
2605};
2606
2619{
2620public:
2627 {
2628 return mImpl->getPlugin();
2629 }
2630
2631protected:
2632 apiv::VPluginV2Layer* mImpl;
2633 virtual ~IPluginV2Layer() noexcept = default;
2634};
2635
2646{
2647public:
2654 {
2655 return mImpl->getPlugin();
2656 }
2657
2658protected:
2659 apiv::VPluginV3Layer* mImpl;
2660 virtual ~IPluginV3Layer() noexcept = default;
2661};
2662
2679enum class UnaryOperation : int32_t
2680{
2681 kEXP = 0,
2682 kLOG = 1,
2683 kSQRT = 2,
2684 kRECIP = 3,
2685 kABS = 4,
2686 kNEG = 5,
2687 kSIN = 6,
2688 kCOS = 7,
2689 kTAN = 8,
2690 kSINH = 9,
2691 kCOSH = 10,
2692 kASIN = 11,
2693 kACOS = 12,
2694 kATAN = 13,
2695 kASINH = 14,
2696 kACOSH = 15,
2697 kATANH = 16,
2698 kCEIL = 17,
2699 kFLOOR = 18,
2700 kERF = 19,
2701 kNOT = 20,
2702 kSIGN = 21,
2703 kROUND = 22,
2704 kISINF = 23,
2705 kISNAN = 24,
2706};
2707
2713template <>
2714constexpr inline int32_t EnumMax<UnaryOperation>() noexcept
2715{
2716 return 25;
2717}
2718
2726class IUnaryLayer : public ILayer
2727{
2728public:
2737 {
2738 mImpl->setOperation(op);
2739 }
2740
2747 {
2748 return mImpl->getOperation();
2749 }
2750
2751protected:
2752 apiv::VUnaryLayer* mImpl;
2753 virtual ~IUnaryLayer() noexcept = default;
2754};
2755
2774enum class ReduceOperation : int32_t
2775{
2776 kSUM = 0,
2777 kPROD = 1,
2778 kMAX = 2,
2779 kMIN = 3,
2780 kAVG = 4
2781};
2782
2788template <>
2789constexpr inline int32_t EnumMax<ReduceOperation>() noexcept
2790{
2791 return 5;
2792}
2793
2801class IReduceLayer : public ILayer
2802{
2803public:
2810 {
2811 mImpl->setOperation(op);
2812 }
2813
2820 {
2821 return mImpl->getOperation();
2822 }
2823
2829 void setReduceAxes(uint32_t reduceAxes) noexcept
2830 {
2831 mImpl->setReduceAxes(reduceAxes);
2832 }
2833
2839 uint32_t getReduceAxes() const noexcept
2840 {
2841 return mImpl->getReduceAxes();
2842 }
2843
2849 void setKeepDimensions(bool keepDimensions) noexcept
2850 {
2851 mImpl->setKeepDimensions(keepDimensions);
2852 }
2853
2859 bool getKeepDimensions() const noexcept
2860 {
2861 return mImpl->getKeepDimensions();
2862 }
2863
2864protected:
2865 apiv::VReduceLayer* mImpl;
2866 virtual ~IReduceLayer() noexcept = default;
2867};
2868
2881class IPaddingLayer : public ILayer
2882{
2883public:
2893 void setPrePaddingNd(Dims const& padding) noexcept
2894 {
2895 mImpl->setPrePaddingNd(padding);
2896 }
2897
2905 Dims getPrePaddingNd() const noexcept
2906 {
2907 return mImpl->getPrePaddingNd();
2908 }
2909
2919 void setPostPaddingNd(Dims const& padding) noexcept
2920 {
2921 mImpl->setPostPaddingNd(padding);
2922 }
2923
2931 Dims getPostPaddingNd() const noexcept
2932 {
2933 return mImpl->getPostPaddingNd();
2934 }
2935
2936protected:
2937 apiv::VPaddingLayer* mImpl;
2938 virtual ~IPaddingLayer() noexcept = default;
2939};
2940
2947{
2954 int32_t order[Dims::MAX_DIMS];
2955};
2956
2969class IShuffleLayer : public ILayer
2970{
2971public:
2981 void setFirstTranspose(Permutation permutation) noexcept
2982 {
2983 mImpl->setFirstTranspose(permutation);
2984 }
2985
2994 {
2995 return mImpl->getFirstTranspose();
2996 }
2997
3021 void setReshapeDimensions(Dims const& dimensions) noexcept
3022 {
3023 mImpl->setReshapeDimensions(dimensions);
3024 }
3025
3035 {
3036 return mImpl->getReshapeDimensions();
3037 }
3038
3044 //
3067 using ILayer::setInput;
3068
3081 void setSecondTranspose(Permutation permutation) noexcept
3082 {
3083 mImpl->setSecondTranspose(permutation);
3084 }
3085
3094 {
3095 return mImpl->getSecondTranspose();
3096 }
3097
3109 void setZeroIsPlaceholder(bool zeroIsPlaceholder) noexcept
3110 {
3111 return mImpl->setZeroIsPlaceholder(zeroIsPlaceholder);
3112 }
3113
3122 bool getZeroIsPlaceholder() const noexcept
3123 {
3124 return mImpl->getZeroIsPlaceholder();
3125 }
3126
3127protected:
3128 apiv::VShuffleLayer* mImpl;
3129 virtual ~IShuffleLayer() noexcept = default;
3130};
3131
3137enum class SampleMode : int32_t
3138{
3139 kSTRICT_BOUNDS = 0,
3140 kWRAP = 1,
3141 kCLAMP = 2,
3142 kFILL = 3,
3143 kREFLECT = 4,
3146};
3147
3153template <>
3154constexpr inline int32_t EnumMax<SampleMode>() noexcept
3155{
3156 return 5;
3157}
3158
3221class ISliceLayer : public ILayer
3222{
3223public:
3233 void setStart(Dims const& start) noexcept
3234 {
3235 mImpl->setStart(start);
3236 }
3237
3248 Dims getStart() const noexcept
3249 {
3250 return mImpl->getStart();
3251 }
3252
3262 void setSize(Dims const& size) noexcept
3263 {
3264 return mImpl->setSize(size);
3265 }
3266
3277 Dims getSize() const noexcept
3278 {
3279 return mImpl->getSize();
3280 }
3281
3291 void setStride(Dims const& stride) noexcept
3292 {
3293 mImpl->setStride(stride);
3294 }
3295
3306 Dims getStride() const noexcept
3307 {
3308 return mImpl->getStride();
3309 }
3310
3316 void setMode(SampleMode mode) noexcept
3317 {
3318 mImpl->setMode(mode);
3319 }
3320
3326 SampleMode getMode() const noexcept
3327 {
3328 return mImpl->getMode();
3329 }
3330
3358 using ILayer::setInput;
3359
3369 void setAxes(Dims const& axes) noexcept
3370 {
3371 mImpl->setAxes(axes);
3372 }
3373
3384 Dims getAxes() const noexcept
3385 {
3386 return mImpl->getAxes();
3387 }
3388
3389protected:
3390 apiv::VSliceLayer* mImpl;
3391 virtual ~ISliceLayer() noexcept = default;
3392};
3393
3406class IShapeLayer : public ILayer
3407{
3408protected:
3409 apiv::VShapeLayer* mImpl;
3410 virtual ~IShapeLayer() noexcept = default;
3411};
3412
3418enum class TopKOperation : int32_t
3419{
3420 kMAX = 0,
3421 kMIN = 1,
3422};
3423
3429template <>
3430constexpr inline int32_t EnumMax<TopKOperation>() noexcept
3431{
3432 return 2;
3433}
3434
3446class ITopKLayer : public ILayer
3447{
3448public:
3454 void setOperation(TopKOperation op) noexcept
3455 {
3456 mImpl->setOperation(op);
3457 }
3458
3465 {
3466 return mImpl->getOperation();
3467 }
3468
3478 void setK(int32_t k) noexcept
3479 {
3480 mImpl->setK(k);
3481 }
3482
3492 int32_t getK() const noexcept
3493 {
3494 return mImpl->getK();
3495 }
3496
3502 void setReduceAxes(uint32_t reduceAxes) noexcept
3503 {
3504 mImpl->setReduceAxes(reduceAxes);
3505 }
3506
3512 uint32_t getReduceAxes() const noexcept
3513 {
3514 return mImpl->getReduceAxes();
3515 }
3516
3531 using ILayer::setInput;
3532
3543 bool setIndicesType(DataType type) noexcept
3544 {
3545 return mImpl->setIndicesType(type);
3546 }
3547
3555 DataType getIndicesType() const noexcept
3556 {
3557 return mImpl->getIndicesType();
3558 }
3559
3560protected:
3561 apiv::VTopKLayer* mImpl;
3562 virtual ~ITopKLayer() noexcept = default;
3563};
3564
3571enum class MatrixOperation : int32_t
3572{
3576 kNONE = 0,
3577
3579 kTRANSPOSE = 1,
3580
3591 kVECTOR = 2,
3592};
3593
3599template <>
3600constexpr inline int32_t EnumMax<MatrixOperation>() noexcept
3601{
3602 return 3;
3603}
3604
3631{
3632public:
3641 void setOperation(int32_t index, MatrixOperation op) noexcept
3642 {
3643 mImpl->setOperation(index, op);
3644 }
3645
3653 MatrixOperation getOperation(int32_t index) const noexcept
3654 {
3655 return mImpl->getOperation(index);
3656 }
3657
3658protected:
3659 apiv::VMatrixMultiplyLayer* mImpl;
3660 virtual ~IMatrixMultiplyLayer() noexcept = default;
3661};
3662
3684class INonZeroLayer : public ILayer
3685{
3686public:
3697 bool setIndicesType(DataType type) noexcept
3698 {
3699 return mImpl->setIndicesType(type);
3700 }
3701
3709 DataType getIndicesType() const noexcept
3710 {
3711 return mImpl->getIndicesType();
3712 }
3713
3714protected:
3715 virtual ~INonZeroLayer() noexcept = default;
3716 apiv::VNonZeroLayer* mImpl;
3717};
3718
3734{
3735protected:
3736 apiv::VRaggedSoftMaxLayer* mImpl;
3737 virtual ~IRaggedSoftMaxLayer() noexcept = default;
3738};
3739
3784{
3785protected:
3786 apiv::VIdentityLayer* mImpl;
3787 virtual ~IIdentityLayer() noexcept = default;
3788};
3789
3796class ICastLayer : public ILayer
3797{
3798public:
3806 void setToType(DataType toType) noexcept
3807 {
3808 mImpl->setToType(toType);
3809 }
3810
3817 DataType getToType() const noexcept
3818 {
3819 return mImpl->getToType();
3820 }
3821
3822protected:
3823 apiv::VCastLayer* mImpl;
3824 virtual ~ICastLayer() noexcept = default;
3825};
3826
3836{
3837public:
3846 void setWeights(Weights weights) noexcept
3847 {
3848 mImpl->setWeights(weights);
3849 }
3850
3856 Weights getWeights() const noexcept
3857 {
3858 return mImpl->getWeights();
3859 }
3860
3868 void setDimensions(Dims const& dimensions) noexcept
3869 {
3870 mImpl->setDimensions(dimensions);
3871 }
3872
3880 Dims getDimensions() const noexcept
3881 {
3882 return mImpl->getDimensions();
3883 }
3884
3885protected:
3886 apiv::VConstantLayer* mImpl;
3887 virtual ~IConstantLayer() noexcept = default;
3888};
3889
3900{
3901protected:
3902 apiv::VParametricReLULayer* mImpl;
3903 virtual ~IParametricReLULayer() noexcept = default;
3904};
3905
3911enum class InterpolationMode : int32_t
3912{
3913 kNEAREST = 0,
3914 kLINEAR = 1,
3915 kCUBIC = 2
3916};
3917
3918namespace impl
3919{
3925template <>
3927{
3928 static constexpr int32_t kVALUE = 3;
3929};
3930} // namespace impl
3931
3940{
3953 kALIGN_CORNERS = 0,
3954
3961 kASYMMETRIC = 1,
3962
3969 kHALF_PIXEL = 2,
3970};
3971
3972namespace impl
3973{
3979template <>
3981{
3982 static constexpr int32_t kVALUE = 3;
3983};
3984} // namespace impl
3985
3993enum class ResizeSelector : int32_t
3994{
3996 kFORMULA = 0,
3997
3999 kUPPER = 1,
4000};
4001
4002namespace impl
4003{
4009template <>
4011{
4012 static constexpr int32_t kVALUE = 2;
4013};
4014} // namespace impl
4015
4023enum class ResizeRoundMode : int32_t
4024{
4026 kHALF_UP = 0,
4027
4029 kHALF_DOWN = 1,
4030
4032 kFLOOR = 2,
4033
4035 kCEIL = 3,
4036};
4037
4038namespace impl
4039{
4045template <>
4047{
4048 static constexpr int32_t kVALUE = 4;
4049};
4050} // namespace impl
4051
4088class IResizeLayer : public ILayer
4089{
4090public:
4109 void setOutputDimensions(Dims const& dimensions) noexcept
4110 {
4111 return mImpl->setOutputDimensions(dimensions);
4112 }
4113
4119 Dims getOutputDimensions() const noexcept
4120 {
4121 return mImpl->getOutputDimensions();
4122 }
4123
4149 void setScales(float const* scales, int32_t nbScales) noexcept
4150 {
4151 mImpl->setScales(scales, nbScales);
4152 }
4153
4168 int32_t getScales(int32_t size, float* scales) const noexcept
4169 {
4170 return mImpl->getScales(size, scales);
4171 }
4172
4180 void setResizeMode(InterpolationMode interpolationMode) noexcept
4181 {
4182 mImpl->setResizeMode(interpolationMode);
4183 }
4184
4191 {
4192 return mImpl->getResizeMode();
4193 }
4194
4214 using ILayer::setInput;
4215
4226 {
4227 mImpl->setCoordinateTransformation(coordTransform);
4228 }
4229
4236 {
4237 return mImpl->getCoordinateTransformation();
4238 }
4239
4251 {
4252 mImpl->setSelectorForSinglePixel(selector);
4253 }
4254
4261 {
4262 return mImpl->getSelectorForSinglePixel();
4263 }
4264
4275 {
4276 mImpl->setNearestRounding(value);
4277 }
4278
4285 {
4286 return mImpl->getNearestRounding();
4287 }
4288
4306 void setCubicCoeff(float A) noexcept
4307 {
4308 mImpl->setCubicCoeff(A);
4309 }
4310
4316 float getCubicCoeff() const noexcept
4317 {
4318 return mImpl->getCubicCoeff();
4319 }
4320
4329 void setExcludeOutside(bool excludeFlag) noexcept
4330 {
4331 mImpl->setExcludeOutside(excludeFlag);
4332 }
4333
4339 bool getExcludeOutside() const noexcept
4340 {
4341 return mImpl->getExcludeOutside();
4342 }
4343
4344protected:
4345 virtual ~IResizeLayer() noexcept = default;
4346 apiv::VResizeLayer* mImpl;
4347};
4348
4354enum class LoopOutput : int32_t
4355{
4357 kLAST_VALUE = 0,
4358
4360 kCONCATENATE = 1,
4361
4363 kREVERSE = 2
4364};
4365
4371template <>
4372constexpr inline int32_t EnumMax<LoopOutput>() noexcept
4373{
4374 return 3;
4375}
4376
4382enum class TripLimit : int32_t
4383{
4384
4385 kCOUNT = 0,
4386 kWHILE = 1
4387};
4388
4394template <>
4395constexpr inline int32_t EnumMax<TripLimit>() noexcept
4396{
4397 return 2;
4398}
4399
4400class ILoop;
4401
4416{
4417public:
4421 ILoop* getLoop() const noexcept
4422 {
4423 return mBoundary->getLoop();
4424 }
4425
4426protected:
4427 virtual ~ILoopBoundaryLayer() noexcept = default;
4428 apiv::VLoopBoundaryLayer* mBoundary;
4429};
4430
4439{
4440public:
4445 {
4446 return mBoundary->getConditional();
4447 }
4448
4449protected:
4450 virtual ~IIfConditionalBoundaryLayer() noexcept = default;
4451 apiv::VConditionalBoundaryLayer* mBoundary;
4452};
4453
4460{
4461public:
4462protected:
4463 virtual ~IConditionLayer() noexcept = default;
4464 apiv::VConditionLayer* mImpl;
4465};
4466
4477{
4478public:
4479protected:
4480 virtual ~IIfConditionalOutputLayer() noexcept = default;
4481 apiv::VConditionalOutputLayer* mImpl;
4482};
4483
4490{
4491public:
4492protected:
4493 virtual ~IIfConditionalInputLayer() noexcept = default;
4494 apiv::VConditionalInputLayer* mImpl;
4495};
4496
4522{
4523public:
4534 {
4535 return mImpl->setCondition(condition);
4536 }
4537
4551 IIfConditionalOutputLayer* addOutput(ITensor& trueSubgraphOutput, ITensor& falseSubgraphOutput) noexcept
4552 {
4553 return mImpl->addOutput(trueSubgraphOutput, falseSubgraphOutput);
4554 }
4555
4564 {
4565 return mImpl->addInput(input);
4566 }
4567
4578 void setName(char const* name) noexcept
4579 {
4580 mImpl->setName(name);
4581 }
4582
4588 char const* getName() const noexcept
4589 {
4590 return mImpl->getName();
4591 }
4592
4593protected:
4594 virtual ~IIfConditional() noexcept = default;
4595 apiv::VIfConditional* mImpl;
4596};
4597
4606{
4607public:
4613 //
4626 using ILayer::setInput;
4627
4628protected:
4629 virtual ~IRecurrenceLayer() noexcept = default;
4630 apiv::VRecurrenceLayer* mImpl;
4631};
4632
4653{
4654public:
4658 LoopOutput getLoopOutput() const noexcept
4659 {
4660 return mImpl->getLoopOutput();
4661 }
4662
4675 void setAxis(int32_t axis) noexcept
4676 {
4677 mImpl->setAxis(axis);
4678 }
4679
4683 int32_t getAxis() const noexcept
4684 {
4685 return mImpl->getAxis();
4686 }
4687
4693 //
4708 using ILayer::setInput;
4709
4710protected:
4711 virtual ~ILoopOutputLayer() noexcept = default;
4712 apiv::VLoopOutputLayer* mImpl;
4713};
4714
4727{
4728public:
4732 TripLimit getTripLimit() const noexcept
4733 {
4734 return mImpl->getTripLimit();
4735 }
4736
4737protected:
4738 virtual ~ITripLimitLayer() noexcept = default;
4739 apiv::VTripLimitLayer* mImpl;
4740};
4741
4753{
4754public:
4758 void setAxis(int32_t axis) noexcept
4759 {
4760 mImpl->setAxis(axis);
4761 }
4762
4766 int32_t getAxis() const noexcept
4767 {
4768 return mImpl->getAxis();
4769 }
4770
4780 void setReverse(bool reverse) noexcept
4781 {
4782 mImpl->setReverse(reverse);
4783 }
4784
4790 bool getReverse() const noexcept
4791 {
4792 return mImpl->getReverse();
4793 }
4794
4795protected:
4796 virtual ~IIteratorLayer() noexcept = default;
4797 apiv::VIteratorLayer* mImpl;
4798};
4799
4810class ILoop : public INoCopy
4811{
4812public:
4819 IRecurrenceLayer* addRecurrence(ITensor& initialValue) noexcept
4820 {
4821 return mImpl->addRecurrence(initialValue);
4822 }
4823
4841 {
4842 return mImpl->addTripLimit(tensor, limit);
4843 }
4844
4853 IIteratorLayer* addIterator(ITensor& tensor, int32_t axis = 0, bool reverse = false) noexcept
4854 {
4855 return mImpl->addIterator(tensor, axis, reverse);
4856 }
4857
4866 ILoopOutputLayer* addLoopOutput(ITensor& tensor, LoopOutput outputKind, int32_t axis = 0) noexcept
4867 {
4868 return mImpl->addLoopOutput(tensor, outputKind, axis);
4869 }
4870
4881 void setName(char const* name) noexcept
4882 {
4883 mImpl->setName(name);
4884 }
4885
4891 char const* getName() const noexcept
4892 {
4893 return mImpl->getName();
4894 }
4895
4896protected:
4897 virtual ~ILoop() noexcept = default;
4898 apiv::VLoop* mImpl;
4899};
4900
4913class ISelectLayer : public ILayer
4914{
4915protected:
4916 virtual ~ISelectLayer() noexcept = default;
4917 apiv::VSelectLayer* mImpl;
4918};
4919
4936{
4937public:
4946 void setMessage(char const* message) noexcept
4947 {
4948 mImpl->setMessage(message);
4949 }
4950
4956 char const* getMessage() const noexcept
4957 {
4958 return mImpl->getMessage();
4959 }
4960
4961protected:
4962 virtual ~IAssertionLayer() noexcept = default;
4963
4964 apiv::VAssertionLayer* mImpl;
4965};
4966
4974enum class FillOperation : int32_t
4975{
4991 kLINSPACE = 0,
4992
4994 kRANDOM_UNIFORM = 1,
4995
4997 kRANDOM_NORMAL = 2
4998};
4999
5005template <>
5006constexpr inline int32_t EnumMax<FillOperation>() noexcept
5007{
5008 return 3;
5009}
5010
5046class IFillLayer : public ILayer
5047{
5048public:
5057 //
5058 void setDimensions(Dims const& dimensions) noexcept
5059 {
5060 mImpl->setDimensions(dimensions);
5061 }
5062
5073 Dims getDimensions() const noexcept
5074 {
5075 return mImpl->getDimensions();
5076 }
5077
5083 void setOperation(FillOperation op) noexcept
5084 {
5085 mImpl->setOperation(op);
5086 }
5087
5094 {
5095 return mImpl->getOperation();
5096 }
5097
5111 //
5112 void setAlpha(double alpha) noexcept
5113 {
5114 mImpl->setAlpha(alpha);
5115 }
5116
5127 double getAlpha() const noexcept
5128 {
5129 return mImpl->getAlpha();
5130 }
5131
5146 void setBeta(double beta) noexcept
5147 {
5148 mImpl->setBeta(beta);
5149 }
5150
5161 double getBeta() const noexcept
5162 {
5163 return mImpl->getBeta();
5164 }
5165
5206 using ILayer::setInput;
5207
5221 //
5222 void setAlphaInt64(int64_t alpha) noexcept
5223 {
5224 mImpl->setAlphaInt64(alpha);
5225 }
5226
5237 int64_t getAlphaInt64() const noexcept
5238 {
5239 return mImpl->getAlphaInt64();
5240 }
5241
5256 void setBetaInt64(int64_t beta) noexcept
5257 {
5258 mImpl->setBetaInt64(beta);
5259 }
5260
5271 int64_t getBetaInt64() const noexcept
5272 {
5273 return mImpl->getBetaInt64();
5274 }
5275
5279 bool isAlphaBetaInt64() const noexcept
5280 {
5281 return mImpl->isAlphaBetaInt64();
5282 }
5283
5296 void setToType(DataType toType) noexcept
5297 {
5298 mImpl->setToType(toType);
5299 }
5300
5308 DataType getToType() const noexcept
5309 {
5310 return mImpl->getToType();
5311 }
5312
5313protected:
5314 virtual ~IFillLayer() noexcept = default;
5315 apiv::VFillLayer* mImpl;
5316};
5317
5393{
5394public:
5403 int32_t getAxis() const noexcept
5404 {
5405 return mImpl->getAxis();
5406 }
5414 void setAxis(int32_t axis) noexcept
5415 {
5416 mImpl->setAxis(axis);
5417 }
5418
5427 bool setBlockShape(Dims const& blockShape) noexcept
5428 {
5429 return mImpl->setBlockShape(blockShape);
5430 }
5431
5439 {
5440 return mImpl->getBlockShape();
5441 }
5442
5454 void setToType(DataType toType) noexcept
5455 {
5456 mImpl->setToType(toType);
5457 }
5458
5466 DataType getToType() const noexcept
5467 {
5468 return mImpl->getToType();
5469 }
5470
5471protected:
5472 virtual ~IQuantizeLayer() noexcept = default;
5473 apiv::VQuantizeLayer* mImpl;
5474};
5475
5545{
5546public:
5555 int32_t getAxis() const noexcept
5556 {
5557 return mImpl->getAxis();
5558 }
5566 void setAxis(int32_t axis) noexcept
5567 {
5568 mImpl->setAxis(axis);
5569 }
5570
5583 bool setBlockShape(Dims const& blockShape) noexcept
5584 {
5585 return mImpl->setBlockShape(blockShape);
5586 }
5587
5595 {
5596 return mImpl->getBlockShape();
5597 }
5598
5610 void setToType(DataType toType) noexcept
5611 {
5612 mImpl->setToType(toType);
5613 }
5614
5622 DataType getToType() const noexcept
5623 {
5624 return mImpl->getToType();
5625 }
5626
5627protected:
5628 virtual ~IDequantizeLayer() noexcept = default;
5629 apiv::VDequantizeLayer* mImpl;
5630};
5631
5650{
5651public:
5663 using ILayer::setInput;
5664
5677 void setToType(DataType toType) noexcept
5678 {
5679 mImpl->setToType(toType);
5680 }
5681
5690 DataType getToType() const noexcept
5691 {
5692 return mImpl->getToType();
5693 }
5694
5703 void setScaleType(DataType scaleType) noexcept
5704 {
5705 mImpl->setScaleType(scaleType);
5706 }
5707
5716 DataType getScaleType() const noexcept
5717 {
5718 return mImpl->getScaleType();
5719 }
5720
5729 TRT_DEPRECATED void setAxis(int32_t axis) noexcept
5730 {
5731 mImpl->setAxis(axis);
5732 }
5733
5739 TRT_DEPRECATED int32_t getAxis() const noexcept
5740 {
5741 return mImpl->getAxis();
5742 }
5743
5752 TRT_DEPRECATED void setBlockSize(int32_t size) noexcept
5753 {
5754 mImpl->setBlockSize(size);
5755 }
5756
5762 TRT_DEPRECATED int32_t getBlockSize() const noexcept
5763 {
5764 return mImpl->getBlockSize();
5765 }
5766
5775 void setBlockShape(Dims const& blockShape) noexcept
5776 {
5777 mImpl->setBlockShape(blockShape);
5778 }
5779
5787 Dims getBlockShape() const noexcept
5788 {
5789 return mImpl->getBlockShape();
5790 }
5791
5792protected:
5793 virtual ~IDynamicQuantizeLayer() noexcept = default;
5794 apiv::VDynamicQuantizeLayer* mImpl;
5795};
5796
5831class IEinsumLayer : public ILayer
5832{
5833public:
5843 bool setEquation(char const* equation) noexcept
5844 {
5845 return mImpl->setEquation(equation);
5846 }
5847
5853 char const* getEquation() const noexcept
5854 {
5855 return mImpl->getEquation();
5856 }
5857
5858protected:
5859 virtual ~IEinsumLayer() noexcept = default;
5860 apiv::VEinsumLayer* mImpl;
5861};
5862
5870enum class ScatterMode : int32_t
5871{
5872 kELEMENT = 0,
5873 kND = 1,
5874};
5875
5881template <>
5882constexpr inline int32_t EnumMax<ScatterMode>() noexcept
5883{
5884 return 2;
5885}
5886
5944class IScatterLayer : public ILayer
5945{
5946public:
5952 void setMode(ScatterMode mode) noexcept
5953 {
5954 mImpl->setMode(mode);
5955 }
5956
5962 ScatterMode getMode() const noexcept
5963 {
5964 return mImpl->getMode();
5965 }
5966
5972 void setAxis(int32_t axis) noexcept
5973 {
5974 mImpl->setAxis(axis);
5975 }
5976
5980 int32_t getAxis() const noexcept
5981 {
5982 return mImpl->getAxis();
5983 }
5984
5985protected:
5986 apiv::VScatterLayer* mImpl;
5987 virtual ~IScatterLayer() noexcept = default;
5988}; // class IScatterLayer
5989
6016class IOneHotLayer : public ILayer
6017{
6018public:
6024 void setAxis(int32_t axis) noexcept
6025 {
6026 mImpl->setAxis(axis);
6027 }
6028
6032 int32_t getAxis() const noexcept
6033 {
6034 return mImpl->getAxis();
6035 }
6036
6037protected:
6038 apiv::VOneHotLayer* mImpl;
6039 virtual ~IOneHotLayer() noexcept = default;
6040};
6041
6054{
6055public:
6062 {
6063 mImpl->setInterpolationMode(mode);
6064 }
6065
6074 {
6075 return mImpl->getInterpolationMode();
6076 }
6077
6083 void setAlignCorners(bool alignCorners) noexcept
6084 {
6085 mImpl->setAlignCorners(alignCorners);
6086 }
6087
6095 bool getAlignCorners() const noexcept
6096 {
6097 return mImpl->getAlignCorners();
6098 }
6099
6107 bool setSampleMode(SampleMode mode) noexcept
6108 {
6109 return mImpl->setSampleMode(mode);
6110 }
6111
6119 SampleMode getSampleMode() const noexcept
6120 {
6121 return mImpl->getSampleMode();
6122 }
6123
6124protected:
6125 apiv::VGridSampleLayer* mImpl;
6126 virtual ~IGridSampleLayer() noexcept = default;
6127}; // class IGridSampleLayer
6128
6136enum class BoundingBoxFormat : int32_t
6137{
6139 kCORNER_PAIRS = 0,
6141 kCENTER_SIZES = 1
6142};
6143
6149template <>
6150constexpr inline int32_t EnumMax<BoundingBoxFormat>() noexcept
6151{
6152 return 2;
6153}
6154
6205class INMSLayer : public ILayer
6206{
6207public:
6218 {
6219 mImpl->setBoundingBoxFormat(fmt);
6220 }
6221
6230 {
6231 return mImpl->getBoundingBoxFormat();
6232 }
6233
6243 void setTopKBoxLimit(int32_t limit) noexcept
6244 {
6245 mImpl->setTopKBoxLimit(limit);
6246 }
6247
6253 int32_t getTopKBoxLimit() const noexcept
6254 {
6255 return mImpl->getTopKBoxLimit();
6256 }
6257
6276 using ILayer::setInput;
6277
6288 bool setIndicesType(DataType type) noexcept
6289 {
6290 return mImpl->setIndicesType(type);
6291 }
6292
6300 DataType getIndicesType() const noexcept
6301 {
6302 return mImpl->getIndicesType();
6303 }
6304
6305protected:
6306 apiv::VNMSLayer* mImpl;
6307 virtual ~INMSLayer() noexcept = default;
6308}; // class INMSLayer
6309
6323{
6324public:
6333 void setBatchAxis(int32_t batchAxis) noexcept
6334 {
6335 mImpl->setBatchAxis(batchAxis);
6336 }
6337
6343 int32_t getBatchAxis() const noexcept
6344 {
6345 return mImpl->getBatchAxis();
6346 }
6347
6356 void setSequenceAxis(int32_t sequenceAxis) noexcept
6357 {
6358 mImpl->setSequenceAxis(sequenceAxis);
6359 }
6360
6366 int32_t getSequenceAxis() const noexcept
6367 {
6368 return mImpl->getSequenceAxis();
6369 }
6370
6371protected:
6372 apiv::VReverseSequenceLayer* mImpl;
6373 virtual ~IReverseSequenceLayer() noexcept = default;
6374}; // class IReverseSequenceLayer
6375
6395{
6396public:
6404 void setEpsilon(float eps) noexcept
6405 {
6406 return mImpl->setEpsilon(eps);
6407 }
6408
6414 float getEpsilon() const noexcept
6415 {
6416 return mImpl->getEpsilon();
6417 }
6418
6424 void setAxes(uint32_t axesMask) noexcept
6425 {
6426 return mImpl->setAxes(axesMask);
6427 }
6428
6434 uint32_t getAxes() const noexcept
6435 {
6436 return mImpl->getAxes();
6437 }
6438
6455 void setNbGroups(int64_t nbGroups) noexcept
6456 {
6457 return mImpl->setNbGroups(nbGroups);
6458 }
6459
6465 int64_t getNbGroups() const noexcept
6466 {
6467 return mImpl->getNbGroups();
6468 }
6469
6491 void setComputePrecision(DataType type) noexcept
6492 {
6493 return mImpl->setComputePrecision(type);
6494 }
6495
6502 {
6503 return mImpl->getComputePrecision();
6504 }
6505
6511 TRT_NODISCARD bool isV2() const noexcept
6512 {
6513 return mImpl->isV2();
6514 }
6515
6516protected:
6517 apiv::VNormalizationLayer* mImpl;
6518 virtual ~INormalizationLayer() noexcept = default;
6519};
6520
6521
6530class ISqueezeLayer : public ILayer
6531{
6532public:
6545 using ILayer::setInput;
6546
6547protected:
6548 apiv::VSqueezeLayer* mImpl;
6549 virtual ~ISqueezeLayer() noexcept = default;
6550};
6551
6561{
6562public:
6576 using ILayer::setInput;
6577
6578protected:
6579 apiv::VUnsqueezeLayer* mImpl;
6580 virtual ~IUnsqueezeLayer() noexcept = default;
6581};
6582
6594enum class CumulativeOperation : int32_t
6595{
6596 kSUM = 0,
6597};
6598
6599namespace impl
6600{
6601
6607template <>
6609{
6610 static constexpr int32_t kVALUE = 1;
6611};
6612
6613} // namespace impl
6614
6643{
6644public:
6655 {
6656 return mImpl->setOperation(op);
6657 }
6658
6667 {
6668 return mImpl->getOperation();
6669 }
6670
6678 void setExclusive(bool exclusive) noexcept
6679 {
6680 mImpl->setExclusive(exclusive);
6681 }
6682
6690 bool getExclusive() const noexcept
6691 {
6692 return mImpl->getExclusive();
6693 }
6694
6702 void setReverse(bool reverse) noexcept
6703 {
6704 mImpl->setReverse(reverse);
6705 }
6706
6714 bool getReverse() const noexcept
6715 {
6716 return mImpl->getReverse();
6717 }
6718
6719protected:
6720 apiv::VCumulativeLayer* mImpl;
6721 virtual ~ICumulativeLayer() noexcept = default;
6722};
6723
6729enum class AttentionNormalizationOp : int32_t
6730{
6731 kNONE
6732 = 0,
6733 kSOFTMAX = 1,
6734};
6735
6736namespace impl
6737{
6743template <>
6745{
6746 static constexpr int32_t kVALUE = 2;
6747};
6748
6749} // namespace impl
6750
6761{
6762public:
6766 IAttention* getAttention() const noexcept
6767 {
6768 return mBoundary->getAttention();
6769 }
6770
6771protected:
6772 virtual ~IAttentionBoundaryLayer() noexcept = default;
6773 apiv::VAttentionBoundaryLayer* mBoundary;
6774};
6775
6787{
6788public:
6804 using ILayer::setInput;
6805
6806protected:
6807 virtual ~IAttentionInputLayer() noexcept = default;
6808 apiv::VAttentionInputLayer* mImpl;
6809};
6810
6822{
6823public:
6824protected:
6825 virtual ~IAttentionOutputLayer() noexcept = default;
6826 apiv::VAttentionOutputLayer* mImpl;
6827};
6828
6878class IAttention : public INoCopy
6879{
6880public:
6889 {
6890 return mImpl->setNormalizationOperation(op);
6891 }
6892
6901 {
6902 return mImpl->getNormalizationOperation();
6903 }
6904
6917 bool setMask(ITensor& mask) noexcept
6918 {
6919 return mImpl->setMask(mask);
6920 }
6921
6929 ITensor* getMask() noexcept
6930 {
6931 return mImpl->getMask();
6932 }
6933
6942 bool setCausal(bool isCausal) noexcept
6943 {
6944 return mImpl->setCausal(isCausal);
6945 }
6946
6954 bool getCausal() const noexcept
6955 {
6956 return mImpl->getCausal();
6957 }
6958
6966 bool setDecomposable(bool decomposable) noexcept
6967 {
6968 return mImpl->setDecomposable(decomposable);
6969 }
6970
6979 bool getDecomposable() const noexcept
6980 {
6981 return mImpl->getDecomposable();
6982 }
6983
6998 bool setInput(int32_t index, ITensor& input) noexcept
6999 {
7000 return mImpl->setInput(index, input);
7001 }
7002
7007 int32_t getNbInputs() const noexcept
7008 {
7009 return mImpl->getNbInputs();
7010 }
7011
7019 ITensor* getInput(int32_t index) const noexcept
7020 {
7021 return mImpl->getInput(index);
7022 }
7023
7027 int32_t getNbOutputs() const noexcept
7028 {
7029 return mImpl->getNbOutputs();
7030 }
7031
7039 ITensor* getOutput(int32_t index) const noexcept
7040 {
7041 return mImpl->getOutput(index);
7042 }
7043
7056 bool setName(char const* name) noexcept
7057 {
7058 return mImpl->setName(name);
7059 }
7060
7068 char const* getName() const noexcept
7069 {
7070 return mImpl->getName();
7071 }
7072
7085 {
7086 return mImpl->setNormalizationQuantizeScale(tensor);
7087 }
7088
7096 {
7097 return mImpl->getNormalizationQuantizeScale();
7098 }
7099
7109 {
7110 return mImpl->setNormalizationQuantizeToType(type);
7111 }
7112
7121 {
7122 return mImpl->getNormalizationQuantizeToType();
7123 }
7124
7140 bool setMetadata(char const* metadata) noexcept
7141 {
7142 return mImpl->setMetadata(metadata);
7143 }
7144
7153 char const* getMetadata() const noexcept
7154 {
7155 return mImpl->getMetadata();
7156 }
7157
7158
7159protected:
7160 apiv::VAttention* mImpl;
7161 virtual ~IAttention() noexcept = default;
7162};
7163
7171{
7172public:
7178 void setInterleaved(bool interleaved) noexcept
7179 {
7180 mImpl->setInterleaved(interleaved);
7181 }
7182
7183
7189 TRT_NODISCARD bool getInterleaved() const noexcept
7190 {
7191 return mImpl->getInterleaved();
7192 }
7193
7194
7200 TRT_NODISCARD bool setRotaryEmbeddingDim(int32_t rotaryEmbeddingDim) noexcept
7201 {
7202 return mImpl->setRotaryEmbeddingDim(rotaryEmbeddingDim);
7203 }
7204
7205
7211 TRT_NODISCARD int32_t getRotaryEmbeddingDim() const noexcept
7212 {
7213 return mImpl->getRotaryEmbeddingDim();
7214 }
7215
7216
7230 using ILayer::setInput;
7231
7232
7233protected:
7234 apiv::VRotaryEmbeddingLayer* mImpl;
7235 virtual ~IRotaryEmbeddingLayer() noexcept = default;
7236};
7237
7243enum class KVCacheMode : int32_t
7244{
7245 kLINEAR = 0,
7246};
7247
7248namespace impl
7249{
7255template <>
7257{
7258 static constexpr int32_t kVALUE = 1;
7259};
7260
7261} // namespace impl
7262
7283{
7284public:
7297 using ILayer::setInput;
7298
7306 bool setCacheMode(KVCacheMode cacheMode) noexcept
7307 {
7308 return mImpl->setCacheMode(cacheMode);
7309 }
7310
7316 KVCacheMode getCacheMode() const noexcept
7317 {
7318 return mImpl->getCacheMode();
7319 }
7320
7321protected:
7322 apiv::VKVCacheUpdateLayer* mImpl;
7323 virtual ~IKVCacheUpdateLayer() noexcept = default;
7324};
7325
7344{
7345public:
7346 virtual ~INetworkDefinition() noexcept = default;
7347
7383 ITensor* addInput(char const* name, DataType type, Dims const& dimensions) noexcept
7384 {
7385 return mImpl->addInput(name, type, dimensions);
7386 }
7387
7397 void markOutput(ITensor& tensor) noexcept
7398 {
7399 mImpl->markOutput(tensor);
7400 }
7401
7415 bool markDebug(ITensor& tensor) noexcept
7416 {
7417 return mImpl->markDebug(tensor);
7418 }
7419
7431 bool unmarkDebug(ITensor& tensor) noexcept
7432 {
7433 return mImpl->unmarkDebug(tensor);
7434 }
7435
7441 bool isDebugTensor(ITensor const& tensor) const noexcept
7442 {
7443 return mImpl->isDebugTensor(tensor);
7444 }
7445
7464 {
7465 return mImpl->markUnfusedTensorsAsDebugTensors();
7466 }
7467
7478 {
7479 return mImpl->unmarkUnfusedTensorsAsDebugTensors();
7480 }
7481
7498 {
7499 return mImpl->addActivation(input, type);
7500 }
7501
7516 ILRNLayer* addLRN(ITensor& input, int64_t window, float alpha, float beta, float k) noexcept
7517 {
7518 return mImpl->addLRN(input, window, alpha, beta, k);
7519 }
7520
7542 IScaleLayer* addScale(ITensor& input, ScaleMode mode, Weights shift, Weights scale, Weights power) noexcept
7543 {
7544 return mImpl->addScale(input, mode, shift, scale, power);
7545 }
7546
7556 {
7557 return mImpl->addSoftMax(input);
7558 }
7559
7572 IConcatenationLayer* addConcatenation(ITensor* const* inputs, int32_t nbInputs) noexcept
7573 {
7574 return mImpl->addConcatenation(inputs, nbInputs);
7575 }
7576
7600 {
7601 return mImpl->addElementWise(input1, input2, op);
7602 }
7603
7621 IUnaryLayer* addUnary(ITensor& input, UnaryOperation operation) noexcept
7622 {
7623 return mImpl->addUnary(input, operation);
7624 }
7625
7636 {
7637 return mImpl->addShuffle(input);
7638 }
7639
7652 IOneHotLayer* addOneHot(ITensor& indices, ITensor& values, ITensor& depth, int32_t axis) noexcept
7653 {
7654 return mImpl->addOneHot(indices, values, depth, axis);
7655 }
7656
7664 int32_t getNbLayers() const noexcept
7665 {
7666 return mImpl->getNbLayers();
7667 }
7668
7678 ILayer* getLayer(int32_t index) const noexcept
7679 {
7680 return mImpl->getLayer(index);
7681 }
7682
7690 int32_t getNbInputs() const noexcept
7691 {
7692 return mImpl->getNbInputs();
7693 }
7694
7706 ITensor* getInput(int32_t index) const noexcept
7707 {
7708 return mImpl->getInput(index);
7709 }
7710
7720 int32_t getNbOutputs() const noexcept
7721 {
7722 return mImpl->getNbOutputs();
7723 }
7724
7736 ITensor* getOutput(int32_t index) const noexcept
7737 {
7738 return mImpl->getOutput(index);
7739 }
7740
7763 ITensor& input, ReduceOperation operation, uint32_t reduceAxes, bool keepDimensions) noexcept
7764 {
7765 return mImpl->addReduce(input, operation, reduceAxes, keepDimensions);
7766 }
7767
7798 TRT_DEPRECATED ITopKLayer* addTopK(ITensor& input, TopKOperation op, int32_t k, uint32_t reduceAxes) noexcept
7799 {
7800 return mImpl->addTopK(input, op, k, reduceAxes);
7801 }
7802
7831 ITopKLayer* addTopK(ITensor& input, TopKOperation op, int32_t k, uint32_t reduceAxes, DataType indicesType) noexcept
7832 {
7833 return mImpl->addTopKV2(input, op, k, reduceAxes, indicesType);
7834 }
7835
7847 IGatherLayer* addGather(ITensor& data, ITensor& indices, int32_t axis) noexcept
7848 {
7849 return mImpl->addGather(data, indices, axis);
7850 }
7851
7863 IGatherLayer* addGatherV2(ITensor& data, ITensor& indices, GatherMode mode) noexcept
7864 {
7865 return mImpl->addGatherV2(data, indices, mode);
7866 }
7867
7883 {
7884 return mImpl->addRaggedSoftMax(input, bounds);
7885 }
7886
7904 ITensor& input0, MatrixOperation op0, ITensor& input1, MatrixOperation op1) noexcept
7905 {
7906 return mImpl->addMatrixMultiply(input0, op0, input1, op1);
7907 }
7908
7923 {
7924 return mImpl->addNonZero(input);
7925 }
7926
7938 INonZeroLayer* addNonZero(ITensor& input, DataType indicesType) noexcept
7939 {
7940 return mImpl->addNonZeroV2(input, indicesType);
7941 }
7942
7962 IConstantLayer* addConstant(Dims const& dimensions, Weights weights) noexcept
7963 {
7964 return mImpl->addConstant(dimensions, weights);
7965 }
7966
7977 {
7978 return mImpl->addIdentity(input);
7979 }
7980
7991 ICastLayer* addCast(ITensor& input, DataType toType) noexcept
7992 {
7993 return mImpl->addCast(input, toType);
7994 }
7995
8006 void removeTensor(ITensor& tensor) noexcept
8007 {
8008 mImpl->removeTensor(tensor);
8009 }
8010
8018 void unmarkOutput(ITensor& tensor) noexcept
8019 {
8020 mImpl->unmarkOutput(tensor);
8021 }
8022
8037 ISliceLayer* addSlice(ITensor& input, Dims const& start, Dims const& size, Dims const& stride) noexcept
8038 {
8039 return mImpl->addSlice(input, start, size, stride);
8040 }
8041
8061 void setName(char const* name) noexcept
8062 {
8063 mImpl->setName(name);
8064 }
8065
8075 char const* getName() const noexcept
8076 {
8077 return mImpl->getName();
8078 }
8079
8091 IShapeLayer* addShape(ITensor& input) noexcept
8092 {
8093 return mImpl->addShape(input);
8094 }
8095
8102 {
8103 return mImpl->getFlags();
8104 }
8105
8113 bool getFlag(NetworkDefinitionCreationFlag networkDefinitionCreationFlag) const noexcept
8114 {
8115 return mImpl->getFlag(networkDefinitionCreationFlag);
8116 }
8117
8130 bool markOutputForShapes(ITensor& tensor) noexcept
8131 {
8132 return mImpl->markOutputForShapes(tensor);
8133 }
8134
8142 bool unmarkOutputForShapes(ITensor& tensor) noexcept
8143 {
8144 return mImpl->unmarkOutputForShapes(tensor);
8145 }
8146
8161 {
8162 return mImpl->addParametricReLU(input, slope);
8163 }
8164
8183 ITensor& input, int64_t nbOutputMaps, Dims const& kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
8184 {
8185 return mImpl->addConvolutionNd(input, nbOutputMaps, kernelSize, kernelWeights, biasWeights);
8186 }
8187
8202 IPoolingLayer* addPoolingNd(ITensor& input, PoolingType type, Dims const& windowSize) noexcept
8203 {
8204 return mImpl->addPoolingNd(input, type, windowSize);
8205 }
8206
8221 //
8225 ITensor& input, int64_t nbOutputMaps, Dims kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
8226 {
8227 return mImpl->addDeconvolutionNd(input, nbOutputMaps, kernelSize, kernelWeights, biasWeights);
8228 }
8229
8262 ITensor& input, ScaleMode mode, Weights shift, Weights scale, Weights power, int32_t channelAxis) noexcept
8263 {
8264 return mImpl->addScaleNd(input, mode, shift, scale, power, channelAxis);
8265 }
8266
8279 {
8280 return mImpl->addResize(input);
8281 }
8282
8292 ILoop* addLoop() noexcept
8293 {
8294 return mImpl->addLoop();
8295 }
8296
8308 {
8309 return mImpl->addIfConditional();
8310 }
8311
8346 ISelectLayer* addSelect(ITensor& condition, ITensor& thenInput, ITensor& elseInput) noexcept
8347 {
8348 return mImpl->addSelect(condition, thenInput, elseInput);
8349 }
8350
8363 IAssertionLayer* addAssertion(ITensor& condition, char const* message) noexcept
8364 {
8365 return mImpl->addAssertion(condition, message);
8366 }
8367
8389 IFillLayer* addFill(Dims const& dimensions, FillOperation op, DataType outputType) noexcept
8390 {
8391 return mImpl->addFillV2(dimensions, op, outputType);
8392 }
8393
8405 IPaddingLayer* addPaddingNd(ITensor& input, Dims const& prePadding, Dims const& postPadding) noexcept
8406 {
8407 return mImpl->addPaddingNd(input, prePadding, postPadding);
8408 }
8409
8429 bool setWeightsName(Weights weights, char const* name) noexcept
8430 {
8431 return mImpl->setWeightsName(weights, name);
8432 }
8433
8445 //
8448 void setErrorRecorder(IErrorRecorder* recorder) noexcept
8449 {
8450 mImpl->setErrorRecorder(recorder);
8451 }
8452
8464 {
8465 return mImpl->getErrorRecorder();
8466 }
8467
8486 IDequantizeLayer* addDequantize(ITensor& input, ITensor& scale, DataType outputType) noexcept
8487 {
8488 return mImpl->addDequantizeV2(input, scale, outputType);
8489 }
8490
8506 IScatterLayer* addScatter(ITensor& data, ITensor& indices, ITensor& updates, ScatterMode mode) noexcept
8507 {
8508 return mImpl->addScatter(data, indices, updates, mode);
8509 }
8510
8530 IQuantizeLayer* addQuantize(ITensor& input, ITensor& scale, DataType outputType) noexcept
8531 {
8532 return mImpl->addQuantizeV2(input, scale, outputType);
8533 }
8534
8558 ITensor& input, int32_t axis, int32_t blockSize, DataType outputType, DataType scaleType) noexcept
8559 {
8560 return mImpl->addDynamicQuantize(input, axis, blockSize, outputType, scaleType);
8561 }
8562
8582 ITensor& input, Dims const& blockShape, DataType outputType, DataType scaleType) noexcept
8583 {
8584 return mImpl->addDynamicQuantizeV2(input, blockShape, outputType, scaleType);
8585 }
8586
8597 IEinsumLayer* addEinsum(ITensor* const* inputs, int32_t nbInputs, char const* equation) noexcept
8598 {
8599 return mImpl->addEinsum(inputs, nbInputs, equation);
8600 }
8601
8616 {
8617 return mImpl->addGridSample(input, grid);
8618 }
8619
8637 TRT_DEPRECATED INMSLayer* addNMS(ITensor& boxes, ITensor& scores, ITensor& maxOutputBoxesPerClass) noexcept
8638 {
8639 return mImpl->addNMS(boxes, scores, maxOutputBoxesPerClass);
8640 }
8641
8657 INMSLayer* addNMS(ITensor& boxes, ITensor& scores, ITensor& maxOutputBoxesPerClass, DataType indicesType) noexcept
8658 {
8659 return mImpl->addNMSV2(boxes, scores, maxOutputBoxesPerClass, indicesType);
8660 }
8661
8675 {
8676 return mImpl->addReverseSequence(input, sequenceLens);
8677 }
8678
8706 TRT_DEPRECATED INormalizationLayer* addNormalization(ITensor& input, ITensor& scale, ITensor& bias, uint32_t axesMask) noexcept
8707 {
8708 return mImpl->addNormalization(input, scale, bias, axesMask);
8709 }
8710
8728 ICumulativeLayer* addCumulative(ITensor& input, ITensor& axis, CumulativeOperation operation, bool exclusive, bool reverse) noexcept
8729 {
8730 return mImpl->addCumulative(input, axis, operation, exclusive, reverse);
8731 }
8732
8756 ITensor& query, ITensor& key, ITensor& value, AttentionNormalizationOp normOp, bool causal) noexcept
8757 {
8758 return mImpl->addAttention(query, key, value, normOp, causal);
8759 }
8760
8780 IRotaryEmbeddingLayer* addRotaryEmbedding(ITensor& input, ITensor& cosCache, ITensor& sinCache, bool interleaved, int32_t rotaryEmbeddingDim) noexcept
8781 {
8782 return mImpl->addRotaryEmbedding(input, cosCache, sinCache, interleaved, rotaryEmbeddingDim);
8783 }
8784
8815 ITensor& cache, ITensor& update, ITensor& writeIndices, KVCacheMode cacheMode) noexcept
8816 {
8817 return mImpl->addKVCacheUpdate(cache, update, writeIndices, cacheMode);
8818 }
8819
8826 virtual IBuilder& getBuilder() const noexcept
8827 {
8828 return mImpl->getBuilder();
8829 }
8830
8839 bool markWeightsRefittable(char const* name) noexcept
8840 {
8841 return mImpl->markWeightsRefittable(name);
8842 }
8843
8851 bool unmarkWeightsRefittable(char const* name) noexcept
8852 {
8853 return mImpl->unmarkWeightsRefittable(name);
8854 }
8855
8864 bool areWeightsMarkedRefittable(char const* name) const noexcept
8865 {
8866 return mImpl->areWeightsMarkedRefittable(name);
8867 }
8868
8883 ISqueezeLayer* addSqueeze(ITensor& input, ITensor& axes) noexcept
8884 {
8885 return mImpl->addSqueeze(input, axes);
8886 }
8887
8905 {
8906 return mImpl->addUnsqueeze(input, axes);
8907 }
8908
8930 TRT_NODISCARD INormalizationLayer* addNormalizationV2(ITensor& input, ITensor& scale, ITensor& bias, uint32_t axesMask) noexcept
8931 {
8932 return mImpl->addNormalizationV2(input, scale, bias, axesMask);
8933 }
8934
8935protected:
8936 apiv::VNetworkDefinition* mImpl;
8937};
8938
8956enum class RuntimePlatform : int32_t
8957{
8960 kSAME_AS_BUILD = 0,
8961
8964 kWINDOWS_AMD64 = 1,
8965
8966
8967};
8968
8969namespace impl
8970{
8976template <>
8978{
8979 static constexpr int32_t kVALUE = 2;
8980};
8981} // namespace impl
8982
8989using BuilderFlags = uint32_t;
8990
8998enum class BuilderFlag : int32_t
8999{
9003
9007
9009 kDEBUG = 2,
9010
9012 kGPU_FALLBACK = 3,
9013
9015 kREFIT = 4,
9016
9019
9023 kTF32 = 6,
9024
9026 kSPARSE_WEIGHTS = 7,
9027
9034 kSAFETY_SCOPE = 8,
9035
9039
9044
9050
9054
9061
9067
9075
9079
9084
9090
9092 kSTRIP_PLAN = 19,
9093
9096
9103 kREFIT_IDENTICAL = 20,
9104
9130 kWEIGHT_STREAMING = 21,
9131
9135
9140 kREFIT_INDIVIDUAL = 23,
9141
9150 kSTRICT_NANS = 24,
9151
9153 kMONITOR_MEMORY = 25,
9154
9158
9161
9173
9174#if ENABLE_FEATURE_DISABLE_RUNTIME_ALLOCATION
9181 kREQUIRE_USER_ALLOCATION = 29,
9182#endif // ENABLE_FEATURE_DISABLE_RUNTIME_ALLOCATION
9183
9184};
9185
9191template <>
9192constexpr inline int32_t EnumMax<BuilderFlag>() noexcept
9193{
9194#if ENABLE_FEATURE_DISABLE_RUNTIME_ALLOCATION
9195 return 30;
9196#else
9197 return 29;
9198#endif // ENABLE_FEATURE_DISABLE_RUNTIME_ALLOCATION
9199}
9200
9201namespace v_1_0
9202{
9218{
9219 uint8_t data[16];
9220};
9221
9232{
9234 uint64_t tacticHash;
9238 static constexpr uint64_t kINVALID_TACTIC_HASH = UINT64_MAX;
9239};
9240} // namespace v_1_0
9241
9258{
9259public:
9260 virtual ~ITimingCache() noexcept = default;
9261
9271 nvinfer1::IHostMemory* serialize() const noexcept
9272 {
9273 return mImpl->serialize();
9274 }
9275
9295 bool combine(ITimingCache const& inputCache, bool ignoreMismatch) noexcept
9296 {
9297 return mImpl->combine(inputCache, ignoreMismatch);
9298 }
9299
9305 bool reset() noexcept
9306 {
9307 return mImpl->reset();
9308 }
9309
9324 int64_t queryKeys(TimingCacheKey* keyBuffer, int64_t capacity) const noexcept
9325 {
9326 return mImpl->queryKeys(keyBuffer, capacity);
9327 }
9328
9341 TimingCacheValue query(TimingCacheKey const& key) const noexcept
9342 {
9343 return mImpl->query(key);
9344 }
9345
9363 bool update(TimingCacheKey const& key, TimingCacheValue const& value) noexcept
9364 {
9365 return mImpl->update(key, value);
9366 }
9367
9368protected:
9369 apiv::VTimingCache* mImpl;
9370};
9371
9379enum class MemoryPoolType : int32_t
9380{
9387 kWORKSPACE = 0,
9388
9396
9402 kDLA_LOCAL_DRAM = 2,
9403
9409 kDLA_GLOBAL_DRAM = 3,
9410
9418 kTACTIC_DRAM = 4,
9419
9433};
9434
9440template <>
9441constexpr inline int32_t EnumMax<MemoryPoolType>() noexcept
9442{
9443 return 6;
9444}
9445
9454enum class PreviewFeature : int32_t
9455{
9462
9467
9474};
9475
9476namespace impl
9477{
9483template <>
9485{
9486 static constexpr int32_t kVALUE = 3;
9487};
9488} // namespace impl
9489
9498enum class HardwareCompatibilityLevel : int32_t
9499{
9502 kNONE = 0,
9503
9515 kAMPERE_PLUS = 1,
9516
9526};
9527
9528namespace impl
9529{
9535template <>
9537{
9538 static constexpr int32_t kVALUE = 3;
9539};
9540} // namespace impl
9541
9547enum class ComputeCapability : int32_t
9548{
9550 kNONE = 0,
9552 kCURRENT = 1,
9554 kSM75 = 75,
9556 kSM80 = 80,
9558 kSM86 = 86,
9560 kSM89 = 89,
9562 kSM120 = 120,
9563};
9564
9573enum class TilingOptimizationLevel : int32_t
9574{
9576 kNONE = 0,
9577
9579 kFAST = 1,
9580
9583 kMODERATE = 2,
9584
9586 kFULL = 3
9587
9588};
9589
9590namespace impl
9591{
9597template <>
9599{
9600 static constexpr int32_t kVALUE = 4;
9601};
9602} // namespace impl
9603
9604namespace v_1_0
9605{
9607{
9608public:
9609 IProgressMonitor() = default;
9610 virtual ~IProgressMonitor() noexcept = default;
9611
9615 InterfaceInfo getInterfaceInfo() const noexcept override
9616 {
9617 return InterfaceInfo{"IProgressMonitor", 1, 0};
9618 }
9619
9639 virtual void phaseStart(char const* phaseName, char const* parentPhase, int32_t nbSteps) noexcept = 0;
9640
9653 virtual bool stepComplete(char const* phaseName, int32_t step) noexcept = 0;
9654
9666 virtual void phaseFinish(char const* phaseName) noexcept = 0;
9667
9668}; // class IProgressMonitor
9669} // namespace v_1_0
9670
9691
9700{
9701public:
9702 virtual ~IBuilderConfig() noexcept = default;
9703
9712 virtual void setAvgTimingIterations(int32_t avgTiming) noexcept
9713 {
9714 mImpl->setAvgTimingIterations(avgTiming);
9715 }
9716
9724 int32_t getAvgTimingIterations() const noexcept
9725 {
9726 return mImpl->getAvgTimingIterations();
9727 }
9728
9737 void setEngineCapability(EngineCapability capability) noexcept
9738 {
9739 mImpl->setEngineCapability(capability);
9740 }
9741
9750 {
9751 return mImpl->getEngineCapability();
9752 }
9753
9766 void setFlags(BuilderFlags builderFlags) noexcept
9767 {
9768 mImpl->setFlags(builderFlags);
9769 }
9770
9778 BuilderFlags getFlags() const noexcept
9779 {
9780 return mImpl->getFlags();
9781 }
9782
9790 void clearFlag(BuilderFlag builderFlag) noexcept
9791 {
9792 mImpl->clearFlag(builderFlag);
9793 }
9794
9802 void setFlag(BuilderFlag builderFlag) noexcept
9803 {
9804 mImpl->setFlag(builderFlag);
9805 }
9806
9814 bool getFlag(BuilderFlag builderFlag) const noexcept
9815 {
9816 return mImpl->getFlag(builderFlag);
9817 }
9818
9831 void setDeviceType(ILayer const* layer, DeviceType deviceType) noexcept
9832 {
9833 mImpl->setDeviceType(layer, deviceType);
9834 }
9835
9841 DeviceType getDeviceType(ILayer const* layer) const noexcept
9842 {
9843 return mImpl->getDeviceType(layer);
9844 }
9845
9853 bool isDeviceTypeSet(ILayer const* layer) const noexcept
9854 {
9855 return mImpl->isDeviceTypeSet(layer);
9856 }
9857
9863 void resetDeviceType(ILayer const* layer) noexcept
9864 {
9865 mImpl->resetDeviceType(layer);
9866 }
9867
9873 bool canRunOnDLA(ILayer const* layer) const noexcept
9874 {
9875 return mImpl->canRunOnDLA(layer);
9876 }
9877
9889 void setDLACore(int32_t dlaCore) noexcept
9890 {
9891 mImpl->setDLACore(dlaCore);
9892 }
9893
9899 int32_t getDLACore() const noexcept
9900 {
9901 return mImpl->getDLACore();
9902 }
9903
9910 void setDefaultDeviceType(DeviceType deviceType) noexcept
9911 {
9912 mImpl->setDefaultDeviceType(deviceType);
9913 }
9914
9921 {
9922 return mImpl->getDefaultDeviceType();
9923 }
9924
9930 void reset() noexcept
9931 {
9932 mImpl->reset();
9933 }
9934
9942 void setProfileStream(const cudaStream_t stream) noexcept
9943 {
9944 return mImpl->setProfileStream(stream);
9945 }
9946
9954 cudaStream_t getProfileStream() const noexcept
9955 {
9956 return mImpl->getProfileStream();
9957 }
9958
9971 int32_t addOptimizationProfile(IOptimizationProfile const* profile) noexcept
9972 {
9973 return mImpl->addOptimizationProfile(profile);
9974 }
9975
9984 int32_t getNbOptimizationProfiles() const noexcept
9985 {
9986 return mImpl->getNbOptimizationProfiles();
9987 }
9988
9997 {
9998 mImpl->setProfilingVerbosity(verbosity);
9999 }
10000
10010 {
10011 return mImpl->getProfilingVerbosity();
10012 }
10013
10031 bool setTacticSources(TacticSources tacticSources) noexcept
10032 {
10033 return mImpl->setTacticSources(tacticSources);
10034 }
10035
10047 {
10048 return mImpl->getTacticSources();
10049 }
10050
10068 TRT_DEPRECATED nvinfer1::ITimingCache* createTimingCache(void const* blob, std::size_t size) const noexcept
10069 {
10070 return mImpl->createTimingCache(blob, size);
10071 }
10072
10093 TRT_DEPRECATED bool setTimingCache(ITimingCache const& cache, bool ignoreMismatch) noexcept
10094 {
10095 return mImpl->setTimingCache(cache, ignoreMismatch);
10096 }
10097
10106 {
10107 return mImpl->getTimingCache();
10108 }
10109
10137 void setMemoryPoolLimit(MemoryPoolType pool, std::size_t poolSize) noexcept
10138 {
10139 mImpl->setMemoryPoolLimit(pool, poolSize);
10140 }
10141
10156 std::size_t getMemoryPoolLimit(MemoryPoolType pool) const noexcept
10157 {
10158 return mImpl->getMemoryPoolLimit(pool);
10159 }
10160
10174 void setPreviewFeature(PreviewFeature feature, bool enable) noexcept
10175 {
10176 mImpl->setPreviewFeature(feature, enable);
10177 }
10178
10188 bool getPreviewFeature(PreviewFeature feature) const noexcept
10189 {
10190 return mImpl->getPreviewFeature(feature);
10191 }
10192
10221 void setBuilderOptimizationLevel(int32_t level) noexcept
10222 {
10223 mImpl->setBuilderOptimizationLevel(level);
10224 }
10225
10234 {
10235 return mImpl->getBuilderOptimizationLevel();
10236 }
10237
10250 void setHardwareCompatibilityLevel(HardwareCompatibilityLevel hardwareCompatibilityLevel) noexcept
10251 {
10252 mImpl->setHardwareCompatibilityLevel(hardwareCompatibilityLevel);
10253 }
10254
10264 {
10265 return mImpl->getHardwareCompatibilityLevel();
10266 }
10267
10276 void setPluginsToSerialize(char const* const* paths, int32_t nbPaths) noexcept
10277 {
10278 mImpl->setPluginsToSerialize(paths, nbPaths);
10279 }
10280
10289 char const* getPluginToSerialize(int32_t index) const noexcept
10290 {
10291 return mImpl->getPluginToSerialize(index);
10292 }
10293
10299 int32_t getNbPluginsToSerialize() const noexcept
10300 {
10301 return mImpl->getNbPluginsToSerialize();
10302 }
10303
10328 void setMaxAuxStreams(int32_t nbStreams) noexcept
10329 {
10330 mImpl->setMaxAuxStreams(nbStreams);
10331 }
10332
10338 int32_t getMaxAuxStreams() const noexcept
10339 {
10340 return mImpl->getMaxAuxStreams();
10341 }
10342
10354 void setProgressMonitor(IProgressMonitor* monitor) noexcept
10355 {
10356 return mImpl->setProgressMonitor(monitor);
10357 }
10358
10365 {
10366 return mImpl->getProgressMonitor();
10367 }
10368
10380 void setRuntimePlatform(RuntimePlatform runtimePlatform) noexcept
10381 {
10382 mImpl->setRuntimePlatform(runtimePlatform);
10383 }
10384
10393 {
10394 return mImpl->getRuntimePlatform();
10395 }
10396
10404 void setMaxNbTactics(int32_t maxNbTactics) noexcept
10405 {
10406 mImpl->setMaxNbTactics(maxNbTactics);
10407 }
10408
10416 int32_t getMaxNbTactics() const noexcept
10417 {
10418 return mImpl->getMaxNbTactics();
10419 }
10420
10433 {
10434 return mImpl->setTilingOptimizationLevel(level);
10435 }
10436
10445 {
10446 return mImpl->getTilingOptimizationLevel();
10447 }
10448
10460 bool setL2LimitForTiling(int64_t size) noexcept
10461 {
10462 return mImpl->setL2LimitForTiling(size);
10463 }
10464
10472 int64_t getL2LimitForTiling() const noexcept
10473 {
10474 return mImpl->getL2LimitForTiling();
10475 }
10476
10491 bool setNbComputeCapabilities(int32_t maxNbComputeCapabilities) noexcept
10492 {
10493 return mImpl->setNbComputeCapabilities(maxNbComputeCapabilities);
10494 }
10495
10503 int32_t getNbComputeCapabilities() const noexcept
10504 {
10505 return mImpl->getNbComputeCapabilities();
10506 }
10507
10521 bool setComputeCapability(ComputeCapability computeCapability, int32_t index) noexcept
10522 {
10523 return mImpl->setComputeCapability(computeCapability, index);
10524 }
10525
10535 ComputeCapability getComputeCapability(int32_t index) const noexcept
10536 {
10537 return mImpl->getComputeCapability(index);
10538 }
10539
10540protected:
10541 apiv::VBuilderConfig* mImpl;
10542};
10543
10552
10562{
10567
10572 kSTRONGLY_TYPED = 1,
10573};
10574
10580template <>
10581constexpr inline int32_t EnumMax<NetworkDefinitionCreationFlag>() noexcept
10582{
10583 return 2;
10584}
10585
10593class IBuilder : public INoCopy
10594{
10595public:
10596 virtual ~IBuilder() noexcept = default;
10597
10605 int32_t getMaxDLABatchSize() const noexcept
10606 {
10607 return mImpl->getMaxDLABatchSize();
10608 }
10609
10613 int32_t getNbDLACores() const noexcept
10614 {
10615 return mImpl->getNbDLACores();
10616 }
10617
10631 void setGpuAllocator(IGpuAllocator* allocator) noexcept
10632 {
10633 mImpl->setGpuAllocator(allocator);
10634 }
10635
10646 {
10647 return mImpl->createBuilderConfig();
10648 }
10649
10672 {
10673 return mImpl->createNetworkV2(flags);
10674 }
10675
10687 {
10688 return mImpl->createOptimizationProfile();
10689 }
10690
10705 void setErrorRecorder(IErrorRecorder* recorder) noexcept
10706 {
10707 mImpl->setErrorRecorder(recorder);
10708 }
10709
10721 {
10722 return mImpl->getErrorRecorder();
10723 }
10724
10728 void reset() noexcept
10729 {
10730 mImpl->reset();
10731 }
10732
10748 {
10749 return mImpl->buildSerializedNetwork(network, config);
10750 }
10751
10769 INetworkDefinition& network, IBuilderConfig& config, IStreamWriter& writer) noexcept
10770 {
10771 return mImpl->buildSerializedNetworkToStream(network, config, writer);
10772 }
10773
10774
10792 bool isNetworkSupported(INetworkDefinition const& network, IBuilderConfig const& config) const noexcept
10793 {
10794 return mImpl->isNetworkSupported(network, config);
10795 }
10796
10802 ILogger* getLogger() const noexcept
10803 {
10804 return mImpl->getLogger();
10805 }
10806
10818 bool setMaxThreads(int32_t maxThreads) noexcept
10819 {
10820 return mImpl->setMaxThreads(maxThreads);
10821 }
10822
10832 int32_t getMaxThreads() const noexcept
10833 {
10834 return mImpl->getMaxThreads();
10835 }
10836
10843 {
10844 return mImpl->getPluginRegistry();
10845 }
10846
10847protected:
10848 apiv::VBuilder* mImpl;
10849};
10850
10851} // namespace nvinfer1
10852
10857extern "C" TENSORRTAPI void* createInferBuilder_INTERNAL(void* logger, int32_t version) noexcept;
10858
10859namespace nvinfer1
10860{
10861namespace
10862{
10863
10871inline IBuilder* createInferBuilder(ILogger& logger) noexcept
10872{
10873 return static_cast<IBuilder*>(createInferBuilder_INTERNAL(&logger, NV_TENSORRT_VERSION));
10874}
10875
10876} // namespace
10877
10890 nvinfer1::EngineCapability capability) noexcept;
10891
10892namespace safe
10893{
10895class IPluginRegistry;
10896} // namespace safe
10897
10898
10899} // namespace nvinfer1
10900
10901#endif // NV_INFER_H
#define TENSORRTAPI
Definition: NvInferRuntimeBase.h:69
#define NV_TENSORRT_VERSION
Definition: NvInferRuntimeBase.h:101
#define TRT_NODISCARD
A stand-in for [[nodiscard]] and [[nodiscard(REASON)]] that works with older compilers.
Definition: NvInferRuntimeBase.h:57
#define TRT_DEPRECATED
Definition: NvInferRuntimeBase.h:42
#define TRT_DEPRECATED_ENUM
Definition: NvInferRuntimeBase.h:43
Definition: NvInferRuntimeBase.h:218
static constexpr int32_t MAX_DIMS
The maximum rank (number of dimensions) supported for a tensor.
Definition: NvInferRuntimeBase.h:221
An Activation layer in a network definition.
Definition: NvInfer.h:1267
void setBeta(float beta) noexcept
Set the beta parameter (must be finite).
Definition: NvInfer.h:1315
void setActivationType(ActivationType type) noexcept
Set the type of activation to be performed.
Definition: NvInfer.h:1276
ActivationType getActivationType() const noexcept
Get the type of activation to be performed.
Definition: NvInfer.h:1286
float getAlpha() const noexcept
Get the alpha parameter.
Definition: NvInfer.h:1324
virtual ~IActivationLayer() noexcept=default
float getBeta() const noexcept
Get the beta parameter.
Definition: NvInfer.h:1333
void setAlpha(float alpha) noexcept
Set the alpha parameter (must be finite).
Definition: NvInfer.h:1301
An assertion layer in a network.
Definition: NvInfer.h:4936
void setMessage(char const *message) noexcept
Set the message to print if the assertion fails.
Definition: NvInfer.h:4946
char const * getMessage() const noexcept
Return the assertion message.
Definition: NvInfer.h:4956
virtual ~IAssertionLayer() noexcept=default
This is a base class for Attention boundary layers.
Definition: NvInfer.h:6761
IAttention * getAttention() const noexcept
Get a pointer to the IAttention associated with this boundary layer.
Definition: NvInfer.h:6766
virtual ~IAttentionBoundaryLayer() noexcept=default
Helper for constructing an attention that consumes query, key and value tensors.
Definition: NvInfer.h:6879
ITensor * getMask() noexcept
Get the optional mask in attention.
Definition: NvInfer.h:6929
bool setMetadata(char const *metadata) noexcept
Set the metadata for IAttention.
Definition: NvInfer.h:7140
bool setDecomposable(bool decomposable) noexcept
Set whether the attention can be decomposed to use multiple kernels if no fused kernel support found.
Definition: NvInfer.h:6966
bool setName(char const *name) noexcept
Set the name of the attention.
Definition: NvInfer.h:7056
bool getDecomposable() const noexcept
Get whether the attention can be decomposed to use multiple kernels if no fused kernel support found.
Definition: NvInfer.h:6979
ITensor * getInput(int32_t index) const noexcept
Get the IAttention input corresponding to the given index.
Definition: NvInfer.h:7019
ITensor * getOutput(int32_t index) const noexcept
Get the IAttention output corresponding to the given index. IAttention has only one output.
Definition: NvInfer.h:7039
int32_t getNbOutputs() const noexcept
Get the number of outputs of a layer. IAttention has one output.
Definition: NvInfer.h:7027
int32_t getNbInputs() const noexcept
Get the number of inputs of IAttention. IAttention has three inputs.
Definition: NvInfer.h:7007
bool setCausal(bool isCausal) noexcept
Set whether the attention will run a causal inference. Cannot be used together with setMask().
Definition: NvInfer.h:6942
bool setNormalizationOperation(AttentionNormalizationOp op) noexcept
Set the normalization operation for the attention.
Definition: NvInfer.h:6888
char const * getName() const noexcept
Return the name of the attention.
Definition: NvInfer.h:7068
bool setNormalizationQuantizeToType(DataType type) noexcept
Set the datatype the attention normalization is quantized to.
Definition: NvInfer.h:7108
AttentionNormalizationOp getNormalizationOperation() const noexcept
Get the normalization operation for the attention.
Definition: NvInfer.h:6900
bool setNormalizationQuantizeScale(ITensor &tensor) noexcept
Set the quantization scale for the attention normalization output.
Definition: NvInfer.h:7084
char const * getMetadata() const noexcept
Get the metadata of IAttention.
Definition: NvInfer.h:7153
DataType getNormalizationQuantizeToType() const noexcept
Get the datatype the attention normalization is quantized to.
Definition: NvInfer.h:7120
ITensor * getNormalizationQuantizeScale() const noexcept
Get the quantization scale for the attention normalization output.
Definition: NvInfer.h:7095
bool setInput(int32_t index, ITensor &input) noexcept
Append or replace an input of this layer with a specific tensor.
Definition: NvInfer.h:6998
bool setMask(ITensor &mask) noexcept
Set whether a mask will be used for the normalization operation.
Definition: NvInfer.h:6917
bool getCausal() const noexcept
Get whether the attention will run a causal inference.
Definition: NvInfer.h:6954
apiv::VAttention * mImpl
Definition: NvInfer.h:7160
virtual ~IAttention() noexcept=default
This layer represents an input to an attention subgraph.
Definition: NvInfer.h:6787
virtual ~IAttentionInputLayer() noexcept=default
This layer represents an output of an IAttention.
Definition: NvInfer.h:6822
virtual ~IAttentionOutputLayer() noexcept=default
Holds properties for configuring a builder to produce an engine.
Definition: NvInfer.h:9700
void setMemoryPoolLimit(MemoryPoolType pool, std::size_t poolSize) noexcept
Set the memory size for the memory pool.
Definition: NvInfer.h:10137
bool setComputeCapability(ComputeCapability computeCapability, int32_t index) noexcept
Set one compute capability for runtime execution.
Definition: NvInfer.h:10521
bool setNbComputeCapabilities(int32_t maxNbComputeCapabilities) noexcept
Set the number of compute capabilities.
Definition: NvInfer.h:10491
TRT_DEPRECATED bool setTimingCache(ITimingCache const &cache, bool ignoreMismatch) noexcept
Attach a timing cache to IBuilderConfig.
Definition: NvInfer.h:10093
void setPreviewFeature(PreviewFeature feature, bool enable) noexcept
Enable or disable a specific preview feature.
Definition: NvInfer.h:10174
bool getPreviewFeature(PreviewFeature feature) const noexcept
Get status of preview feature.
Definition: NvInfer.h:10188
int32_t getBuilderOptimizationLevel() noexcept
Get builder optimization level.
Definition: NvInfer.h:10233
bool setTacticSources(TacticSources tacticSources) noexcept
Set tactic sources.
Definition: NvInfer.h:10031
void setPluginsToSerialize(char const *const *paths, int32_t nbPaths) noexcept
Set the plugin libraries to be serialized with version-compatible engines.
Definition: NvInfer.h:10276
bool setTilingOptimizationLevel(TilingOptimizationLevel level) noexcept
Set the Tiling optimization level.
Definition: NvInfer.h:10432
bool setL2LimitForTiling(int64_t size) noexcept
Set the L2 cache usage limit for Tiling optimization.
Definition: NvInfer.h:10460
std::size_t getMemoryPoolLimit(MemoryPoolType pool) const noexcept
Get the memory size limit of the memory pool.
Definition: NvInfer.h:10156
int32_t getDLACore() const noexcept
Get the DLA core that the engine executes on.
Definition: NvInfer.h:9899
int32_t getNbPluginsToSerialize() const noexcept
Get the number of plugin library paths to be serialized with version-compatible engines.
Definition: NvInfer.h:10299
void setDeviceType(ILayer const *layer, DeviceType deviceType) noexcept
Set the device that this layer must execute on.
Definition: NvInfer.h:9831
void setEngineCapability(EngineCapability capability) noexcept
Configure the builder to target specified EngineCapability flow.
Definition: NvInfer.h:9737
int32_t getMaxAuxStreams() const noexcept
Get the maximum number of auxiliary streams that TRT is allowed to use.
Definition: NvInfer.h:10338
bool getFlag(BuilderFlag builderFlag) const noexcept
Returns true if the build mode flag is set.
Definition: NvInfer.h:9814
void setMaxNbTactics(int32_t maxNbTactics) noexcept
Set the maximum number of tactics to time when there is a choice of tactics.
Definition: NvInfer.h:10404
int64_t getL2LimitForTiling() const noexcept
Get the L2 cache usage limit for tiling optimization.
Definition: NvInfer.h:10472
void setProgressMonitor(IProgressMonitor *monitor) noexcept
Sets the progress monitor for building a network.
Definition: NvInfer.h:10354
void setProfilingVerbosity(ProfilingVerbosity verbosity) noexcept
Set verbosity level of layer information exposed in NVTX annotations and IEngineInspector.
Definition: NvInfer.h:9996
int32_t getNbOptimizationProfiles() const noexcept
Get number of optimization profiles.
Definition: NvInfer.h:9984
void reset() noexcept
Resets the builder configuration to defaults.
Definition: NvInfer.h:9930
char const * getPluginToSerialize(int32_t index) const noexcept
Get the plugin library path to be serialized with version-compatible engines.
Definition: NvInfer.h:10289
EngineCapability getEngineCapability() const noexcept
Query EngineCapability flow configured for the builder.
Definition: NvInfer.h:9749
RuntimePlatform getRuntimePlatform() const noexcept
Get the target platform for runtime execution.
Definition: NvInfer.h:10392
DeviceType getDefaultDeviceType() const noexcept
Get the default DeviceType which was set by setDefaultDeviceType.
Definition: NvInfer.h:9920
void setRuntimePlatform(RuntimePlatform runtimePlatform) noexcept
Set the target platform for runtime execution.
Definition: NvInfer.h:10380
int32_t getMaxNbTactics() const noexcept
Query the maximum number of tactics timed when there is a choice.
Definition: NvInfer.h:10416
BuilderFlags getFlags() const noexcept
Get the build mode flags for this builder config. Defaults to 0.
Definition: NvInfer.h:9778
void setFlags(BuilderFlags builderFlags) noexcept
Set the build mode flags to turn on builder options for this network.
Definition: NvInfer.h:9766
TacticSources getTacticSources() const noexcept
Get tactic sources.
Definition: NvInfer.h:10046
void resetDeviceType(ILayer const *layer) noexcept
reset the DeviceType for this layer
Definition: NvInfer.h:9863
ComputeCapability getComputeCapability(int32_t index) const noexcept
Get one compute capability for runtime execution.
Definition: NvInfer.h:10535
void setDLACore(int32_t dlaCore) noexcept
Sets the DLA core used by the network. Defaults to -1.
Definition: NvInfer.h:9889
HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept
Get the hardware compatibility level.
Definition: NvInfer.h:10263
int32_t getNbComputeCapabilities() const noexcept
Get the number of compute capabilities.
Definition: NvInfer.h:10503
void clearFlag(BuilderFlag builderFlag) noexcept
clear a single build mode flag.
Definition: NvInfer.h:9790
int32_t addOptimizationProfile(IOptimizationProfile const *profile) noexcept
Add an optimization profile.
Definition: NvInfer.h:9971
IProgressMonitor * getProgressMonitor() const noexcept
Definition: NvInfer.h:10364
apiv::VBuilderConfig * mImpl
Definition: NvInfer.h:10541
int32_t getAvgTimingIterations() const noexcept
Query the number of averaging iterations.
Definition: NvInfer.h:9724
void setDefaultDeviceType(DeviceType deviceType) noexcept
Sets the default DeviceType to be used by the builder. It ensures that all the layers that can run on...
Definition: NvInfer.h:9910
void setFlag(BuilderFlag builderFlag) noexcept
Set a single build mode flag.
Definition: NvInfer.h:9802
TRT_DEPRECATED nvinfer1::ITimingCache * createTimingCache(void const *blob, std::size_t size) const noexcept
Create timing cache.
Definition: NvInfer.h:10068
virtual ~IBuilderConfig() noexcept=default
DeviceType getDeviceType(ILayer const *layer) const noexcept
Get the device that this layer executes on.
Definition: NvInfer.h:9841
bool canRunOnDLA(ILayer const *layer) const noexcept
Checks if a layer can run on DLA.
Definition: NvInfer.h:9873
cudaStream_t getProfileStream() const noexcept
Get the CUDA stream that is used to profile this network.
Definition: NvInfer.h:9954
void setHardwareCompatibilityLevel(HardwareCompatibilityLevel hardwareCompatibilityLevel) noexcept
Set the hardware compatibility level.
Definition: NvInfer.h:10250
TilingOptimizationLevel getTilingOptimizationLevel() const noexcept
Get the Tiling optimization level.
Definition: NvInfer.h:10444
void setMaxAuxStreams(int32_t nbStreams) noexcept
Set the maximum number of auxiliary streams that TRT is allowed to use.
Definition: NvInfer.h:10328
ProfilingVerbosity getProfilingVerbosity() const noexcept
Get verbosity level of layer information exposed in NVTX annotations and IEngineInspector.
Definition: NvInfer.h:10009
TRT_DEPRECATED nvinfer1::ITimingCache const * getTimingCache() const noexcept
Get the pointer to the timing cache from current IBuilderConfig.
Definition: NvInfer.h:10105
bool isDeviceTypeSet(ILayer const *layer) const noexcept
whether the DeviceType has been explicitly set for this layer
Definition: NvInfer.h:9853
void setBuilderOptimizationLevel(int32_t level) noexcept
Set builder optimization level.
Definition: NvInfer.h:10221
void setProfileStream(const cudaStream_t stream) noexcept
Set the CUDA stream that is used to profile this network.
Definition: NvInfer.h:9942
Builds an engine from a network definition.
Definition: NvInfer.h:10594
int32_t getNbDLACores() const noexcept
Return the number of DLA engines available to this builder.
Definition: NvInfer.h:10613
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInfer.h:10720
apiv::VBuilder * mImpl
Definition: NvInfer.h:10848
ILogger * getLogger() const noexcept
get the logger with which the builder was created
Definition: NvInfer.h:10802
bool isNetworkSupported(INetworkDefinition const &network, IBuilderConfig const &config) const noexcept
Checks that a network is within the scope of the IBuilderConfig settings.
Definition: NvInfer.h:10792
int32_t getMaxThreads() const noexcept
get the maximum number of threads that can be used by the builder.
Definition: NvInfer.h:10832
IPluginRegistry & getPluginRegistry() noexcept
get the local plugin registry that can be used by the builder.
Definition: NvInfer.h:10842
nvinfer1::IOptimizationProfile * createOptimizationProfile() noexcept
Create a new optimization profile.
Definition: NvInfer.h:10686
void setGpuAllocator(IGpuAllocator *allocator) noexcept
Set the GPU allocator.
Definition: NvInfer.h:10631
nvinfer1::INetworkDefinition * createNetworkV2(NetworkDefinitionCreationFlags flags) noexcept
Create a network definition object.
Definition: NvInfer.h:10671
nvinfer1::IBuilderConfig * createBuilderConfig() noexcept
Create a builder configuration object.
Definition: NvInfer.h:10645
void reset() noexcept
Resets the builder state to default values.
Definition: NvInfer.h:10728
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInfer.h:10818
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInfer.h:10705
nvinfer1::IHostMemory * buildSerializedNetwork(INetworkDefinition &network, IBuilderConfig &config) noexcept
Builds and serializes a network for the given INetworkDefinition and IBuilderConfig.
Definition: NvInfer.h:10747
virtual ~IBuilder() noexcept=default
bool buildSerializedNetworkToStream(INetworkDefinition &network, IBuilderConfig &config, IStreamWriter &writer) noexcept
Builds and serializes a network into stream for the given INetworkDefinition and IBuilderConfig.
Definition: NvInfer.h:10768
A cast layer in a network.
Definition: NvInfer.h:3797
virtual ~ICastLayer() noexcept=default
apiv::VCastLayer * mImpl
Definition: NvInfer.h:3823
DataType getToType() const noexcept
Return cast layer output type.
Definition: NvInfer.h:3817
void setToType(DataType toType) noexcept
Set cast layer output type.
Definition: NvInfer.h:3806
A concatenation layer in a network definition.
Definition: NvInfer.h:1977
void setAxis(int32_t axis) noexcept
Set the axis along which concatenation occurs.
Definition: NvInfer.h:1990
int32_t getAxis() const noexcept
Get the axis along which concatenation occurs.
Definition: NvInfer.h:2000
virtual ~IConcatenationLayer() noexcept=default
This layer represents a condition input to an IIfConditional.
Definition: NvInfer.h:4460
virtual ~IConditionLayer() noexcept=default
Layer that represents a constant value.
Definition: NvInfer.h:3836
void setWeights(Weights weights) noexcept
Set the weights for the layer.
Definition: NvInfer.h:3846
Weights getWeights() const noexcept
Get the weights for the layer.
Definition: NvInfer.h:3856
void setDimensions(Dims const &dimensions) noexcept
Set the dimensions for the layer.
Definition: NvInfer.h:3868
apiv::VConstantLayer * mImpl
Definition: NvInfer.h:3886
virtual ~IConstantLayer() noexcept=default
Dims getDimensions() const noexcept
Get the dimensions for the layer.
Definition: NvInfer.h:3880
A convolution layer in a network definition.
Definition: NvInfer.h:947
Dims getPrePadding() const noexcept
Get the pre-padding.
Definition: NvInfer.h:1072
Weights getBiasWeights() const noexcept
Get the bias weights for the convolution.
Definition: NvInfer.h:1045
void setPaddingMode(PaddingMode paddingMode) noexcept
Set the padding mode.
Definition: NvInfer.h:1113
void setDilationNd(Dims const &dilation) noexcept
Set the multi-dimension dilation of the convolution.
Definition: NvInfer.h:1217
Dims getPaddingNd() const noexcept
Get the multi-dimension padding of the convolution.
Definition: NvInfer.h:1203
Dims getStrideNd() const noexcept
Get the multi-dimension stride of the convolution.
Definition: NvInfer.h:1173
Weights getKernelWeights() const noexcept
Get the kernel weights of the convolution.
Definition: NvInfer.h:1020
void setStrideNd(Dims const &stride) noexcept
Set the multi-dimension stride of the convolution.
Definition: NvInfer.h:1163
Dims getDilationNd() const noexcept
Get the multi-dimension dilation of the convolution.
Definition: NvInfer.h:1227
int64_t getNbOutputMaps() const noexcept
Get the number of output maps for the convolution.
Definition: NvInfer.h:966
void setKernelWeights(Weights weights) noexcept
Set the kernel weights for the convolution.
Definition: NvInfer.h:1010
Dims getPostPadding() const noexcept
Get the post-padding.
Definition: NvInfer.h:1099
int64_t getNbGroups() const noexcept
Get the number of groups of the convolution.
Definition: NvInfer.h:996
PaddingMode getPaddingMode() const noexcept
Get the padding mode.
Definition: NvInfer.h:1125
virtual ~IConvolutionLayer() noexcept=default
void setNbGroups(int64_t nbGroups) noexcept
Set the number of groups for a convolution.
Definition: NvInfer.h:986
void setNbOutputMaps(int64_t nbOutputMaps) noexcept
Set the number of output maps for the convolution.
Definition: NvInfer.h:956
void setBiasWeights(Weights weights) noexcept
Set the bias weights for the convolution.
Definition: NvInfer.h:1035
Dims getKernelSizeNd() const noexcept
Get the multi-dimension kernel size of the convolution.
Definition: NvInfer.h:1148
void setPaddingNd(Dims const &padding) noexcept
Set the multi-dimension padding of the convolution.
Definition: NvInfer.h:1191
void setPrePadding(Dims const &padding) noexcept
Set the multi-dimension pre-padding of the convolution.
Definition: NvInfer.h:1062
void setPostPadding(Dims const &padding) noexcept
Set the multi-dimension post-padding of the convolution.
Definition: NvInfer.h:1089
void setKernelSizeNd(Dims const &kernelSize) noexcept
Set the multi-dimension kernel size of the convolution.
Definition: NvInfer.h:1138
Layer that represents a cumulative operation across a tensor.
Definition: NvInfer.h:6643
bool setOperation(CumulativeOperation op) noexcept
Set the cumulative operation for the layer.
Definition: NvInfer.h:6654
void setReverse(bool reverse) noexcept
Specify whether the cumulative operation should be applied backward.
Definition: NvInfer.h:6702
apiv::VCumulativeLayer * mImpl
Definition: NvInfer.h:6720
bool getExclusive() const noexcept
Get whether it is exclusive accumulation or inclusive accumulation.
Definition: NvInfer.h:6690
virtual ~ICumulativeLayer() noexcept=default
bool getReverse() const noexcept
Get the boolean that specifies whether the cumulative operation should be applied backward.
Definition: NvInfer.h:6714
void setExclusive(bool exclusive) noexcept
Set whether it is an exclusive accumulation or inclusive accumulation.
Definition: NvInfer.h:6678
CumulativeOperation getOperation() const noexcept
Get the cumulative operation for the layer.
Definition: NvInfer.h:6666
A deconvolution layer in a network definition.
Definition: NvInfer.h:2018
void setBiasWeights(Weights weights) noexcept
Set the bias weights for the deconvolution.
Definition: NvInfer.h:2106
int64_t getNbGroups() const noexcept
Get the number of groups for a deconvolution.
Definition: NvInfer.h:2067
Weights getKernelWeights() const noexcept
Get the kernel weights for the deconvolution.
Definition: NvInfer.h:2091
void setPrePadding(Dims const &padding) noexcept
Set the multi-dimension pre-padding of the deconvolution.
Definition: NvInfer.h:2133
Dims getStrideNd() const noexcept
Get the multi-dimension stride of the deconvolution.
Definition: NvInfer.h:2248
Dims getDilationNd() const noexcept
Get the multi-dimension dilation of the deconvolution.
Definition: NvInfer.h:2314
Weights getBiasWeights() const noexcept
Get the bias weights for the deconvolution.
Definition: NvInfer.h:2116
void setKernelWeights(Weights weights) noexcept
Set the kernel weights for the deconvolution.
Definition: NvInfer.h:2081
int64_t getNbOutputMaps() const noexcept
Get the number of output feature maps for the deconvolution.
Definition: NvInfer.h:2037
void setStrideNd(Dims const &stride) noexcept
Set the multi-dimension stride of the deconvolution.
Definition: NvInfer.h:2238
Dims getPostPadding() const noexcept
Get the padding.
Definition: NvInfer.h:2170
Dims getKernelSizeNd() const noexcept
Get the multi-dimension kernel size of the deconvolution.
Definition: NvInfer.h:2221
void setPostPadding(Dims const &padding) noexcept
Set the multi-dimension post-padding of the deconvolution.
Definition: NvInfer.h:2160
void setKernelSizeNd(Dims const &kernelSize) noexcept
Set the multi-dimension kernel size of the deconvolution.
Definition: NvInfer.h:2211
virtual ~IDeconvolutionLayer() noexcept=default
void setPaddingNd(Dims const &padding) noexcept
Set the multi-dimension padding of the deconvolution.
Definition: NvInfer.h:2266
void setNbOutputMaps(int64_t nbOutputMaps) noexcept
Set the number of output feature maps for the deconvolution.
Definition: NvInfer.h:2027
Dims getPaddingNd() const noexcept
Get the multi-dimension padding of the deconvolution.
Definition: NvInfer.h:2278
void setDilationNd(Dims const &dilation) noexcept
Set the multi-dimension dilation of the deconvolution.
Definition: NvInfer.h:2304
void setPaddingMode(PaddingMode paddingMode) noexcept
Set the padding mode.
Definition: NvInfer.h:2184
void setNbGroups(int64_t nbGroups) noexcept
Set the number of groups for a deconvolution.
Definition: NvInfer.h:2057
Dims getPrePadding() const noexcept
Get the pre-padding.
Definition: NvInfer.h:2143
PaddingMode getPaddingMode() const noexcept
Get the padding mode.
Definition: NvInfer.h:2196
A Dequantize layer in a network definition.
Definition: NvInfer.h:5545
TRT_NODISCARD Dims getBlockShape() const noexcept
Get the shape of the quantization block.
Definition: NvInfer.h:5594
void setToType(DataType toType) noexcept
Set the Dequantize layer output type.
Definition: NvInfer.h:5610
virtual ~IDequantizeLayer() noexcept=default
int32_t getAxis() const noexcept
Get the quantization axis.
Definition: NvInfer.h:5555
bool setBlockShape(Dims const &blockShape) noexcept
Set the shape of the quantization block.
Definition: NvInfer.h:5583
DataType getToType() const noexcept
Return the Dequantize layer output type.
Definition: NvInfer.h:5622
void setAxis(int32_t axis) noexcept
Set the quantization axis.
Definition: NvInfer.h:5566
A network layer to perform dynamic quantization.
Definition: NvInfer.h:5650
DataType getScaleType() const noexcept
Return the scale factors data type.
Definition: NvInfer.h:5716
TRT_DEPRECATED void setAxis(int32_t axis) noexcept
Set the axis along which block quantization occurs.
Definition: NvInfer.h:5729
TRT_DEPRECATED void setBlockSize(int32_t size) noexcept
Set the size of the quantization block.
Definition: NvInfer.h:5752
Dims getBlockShape() const noexcept
Get the shape of the quantization block.
Definition: NvInfer.h:5787
void setScaleType(DataType scaleType) noexcept
Set the data type of the scale factors used to quantize the data.
Definition: NvInfer.h:5703
DataType getToType() const noexcept
Return DynamicQuantizeLayer's quantized output type.
Definition: NvInfer.h:5690
TRT_DEPRECATED int32_t getAxis() const noexcept
Get the axis along which blocking occurs.
Definition: NvInfer.h:5739
virtual ~IDynamicQuantizeLayer() noexcept=default
void setToType(DataType toType) noexcept
Set DynamicQuantizeLayer's quantized output type.
Definition: NvInfer.h:5677
void setBlockShape(Dims const &blockShape) noexcept
Set the shape of the quantization block.
Definition: NvInfer.h:5775
TRT_DEPRECATED int32_t getBlockSize() const noexcept
Get the size of the quantization block.
Definition: NvInfer.h:5762
An Einsum layer in a network.
Definition: NvInfer.h:5832
bool setEquation(char const *equation) noexcept
Set the equation. The equation is a comma-separated list of subscript labels, where each label refers...
Definition: NvInfer.h:5843
virtual ~IEinsumLayer() noexcept=default
char const * getEquation() const noexcept
Return the equation.
Definition: NvInfer.h:5853
A elementwise layer in a network definition.
Definition: NvInfer.h:2388
virtual ~IElementWiseLayer() noexcept=default
apiv::VElementWiseLayer * mImpl
Definition: NvInfer.h:2417
ElementWiseOperation getOperation() const noexcept
Get the binary operation for the layer.
Definition: NvInfer.h:2411
void setOperation(ElementWiseOperation op) noexcept
Set the binary operation for the layer.
Definition: NvInfer.h:2399
Generate a tensor according to a specified mode.
Definition: NvInfer.h:5047
bool isAlphaBetaInt64() const noexcept
Return true if alpha/beta have type int64, false if they have type double.
Definition: NvInfer.h:5279
FillOperation getOperation() const noexcept
Get the fill operation for the layer.
Definition: NvInfer.h:5093
void setOperation(FillOperation op) noexcept
Set the fill operation for the layer.
Definition: NvInfer.h:5083
DataType getToType() const noexcept
Get the fill layer output type.
Definition: NvInfer.h:5308
void setAlphaInt64(int64_t alpha) noexcept
Set the alpha parameter with int64 datatype.
Definition: NvInfer.h:5222
void setBetaInt64(int64_t beta) noexcept
Set the beta parameter with int64 datatype.
Definition: NvInfer.h:5256
void setBeta(double beta) noexcept
Set the beta parameter.
Definition: NvInfer.h:5146
int64_t getAlphaInt64() const noexcept
Get the value of alpha parameter with int64 datatype.
Definition: NvInfer.h:5237
int64_t getBetaInt64() const noexcept
Get the value of beta parameter with int64 datatype.
Definition: NvInfer.h:5271
double getAlpha() const noexcept
Get the value of alpha parameter.
Definition: NvInfer.h:5127
void setDimensions(Dims const &dimensions) noexcept
Set the output tensor's dimensions.
Definition: NvInfer.h:5058
void setAlpha(double alpha) noexcept
Set the alpha parameter.
Definition: NvInfer.h:5112
void setToType(DataType toType) noexcept
Set the fill layer output type.
Definition: NvInfer.h:5296
Dims getDimensions() const noexcept
Get the output tensor's dimensions.
Definition: NvInfer.h:5073
double getBeta() const noexcept
Get the value of beta parameter.
Definition: NvInfer.h:5161
virtual ~IFillLayer() noexcept=default
A Gather layer in a network definition. Supports several kinds of gathering.
Definition: NvInfer.h:2521
void setGatherAxis(int32_t axis) noexcept
Set the axis used by GatherMode::kELEMENTS and GatherMode::kDEFAULT The axis must be less than the nu...
Definition: NvInfer.h:2532
void setNbElementWiseDims(int32_t elementWiseDims) noexcept
Set the number of leading dimensions of indices tensor to be handled elementwise.
Definition: NvInfer.h:2567
apiv::VGatherLayer * mImpl
Definition: NvInfer.h:2603
int32_t getNbElementWiseDims() const noexcept
Get the number of leading dimensions of indices tensor to be handled elementwise.
Definition: NvInfer.h:2577
void setMode(GatherMode mode) noexcept
Set the gather mode.
Definition: NvInfer.h:2587
int32_t getGatherAxis() const noexcept
Get the axis to gather on.
Definition: NvInfer.h:2544
GatherMode getMode() const noexcept
Get the gather mode.
Definition: NvInfer.h:2597
virtual ~IGatherLayer() noexcept=default
A GridSample layer in a network definition.
Definition: NvInfer.h:6054
void setInterpolationMode(InterpolationMode mode) noexcept
Set the grid sample interpolation mode.
Definition: NvInfer.h:6061
bool setSampleMode(SampleMode mode) noexcept
Set the sample mode.
Definition: NvInfer.h:6107
void setAlignCorners(bool alignCorners) noexcept
Set the align corners mode.
Definition: NvInfer.h:6083
apiv::VGridSampleLayer * mImpl
Definition: NvInfer.h:6125
SampleMode getSampleMode() const noexcept
Get the sample mode.
Definition: NvInfer.h:6119
InterpolationMode getInterpolationMode() const noexcept
Get the grid sample interpolation mode.
Definition: NvInfer.h:6073
bool getAlignCorners() const noexcept
Get the align corners mode.
Definition: NvInfer.h:6095
virtual ~IGridSampleLayer() noexcept=default
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:142
A layer that represents the identity function.
Definition: NvInfer.h:3784
apiv::VIdentityLayer * mImpl
Definition: NvInfer.h:3786
virtual ~IIdentityLayer() noexcept=default
This is a base class for Conditional boundary layers.
Definition: NvInfer.h:4439
IIfConditional * getConditional() const noexcept
Get a pointer to the IIfConditional associated with this boundary layer.
Definition: NvInfer.h:4444
virtual ~IIfConditionalBoundaryLayer() noexcept=default
Helper for constructing conditionally-executed subgraphs.
Definition: NvInfer.h:4522
IIfConditionalInputLayer * addInput(ITensor &input) noexcept
Add an If-conditional input.
Definition: NvInfer.h:4563
char const * getName() const noexcept
Return the name of the conditional.
Definition: NvInfer.h:4588
virtual ~IIfConditional() noexcept=default
IConditionLayer * setCondition(ITensor &condition) noexcept
Set the condition tensor for this If-Conditional construct.
Definition: NvInfer.h:4533
IIfConditionalOutputLayer * addOutput(ITensor &trueSubgraphOutput, ITensor &falseSubgraphOutput) noexcept
Add an If-conditional output.
Definition: NvInfer.h:4551
void setName(char const *name) noexcept
Set the name of the conditional.
Definition: NvInfer.h:4578
This layer represents an input to an IIfConditional.
Definition: NvInfer.h:4490
virtual ~IIfConditionalInputLayer() noexcept=default
This layer represents an output of an IIfConditional.
Definition: NvInfer.h:4477
virtual ~IIfConditionalOutputLayer() noexcept=default
A layer to do iterations.
Definition: NvInfer.h:4753
virtual ~IIteratorLayer() noexcept=default
void setReverse(bool reverse) noexcept
Set iteration order to be reverse.
Definition: NvInfer.h:4780
bool getReverse() const noexcept
Check if the iteration order is reverse.
Definition: NvInfer.h:4790
int32_t getAxis() const noexcept
Get axis being iterated over.
Definition: NvInfer.h:4766
void setAxis(int32_t axis) noexcept
Set axis to iterate over.
Definition: NvInfer.h:4758
Layer that represents a KVCacheUpdate operation.
Definition: NvInfer.h:7283
bool setCacheMode(KVCacheMode cacheMode) noexcept
Set the mode of the KVCacheUpdate layer.
Definition: NvInfer.h:7306
virtual ~IKVCacheUpdateLayer() noexcept=default
KVCacheMode getCacheMode() const noexcept
Get the mode of the KVCacheUpdate layer.
Definition: NvInfer.h:7316
apiv::VKVCacheUpdateLayer * mImpl
Definition: NvInfer.h:7322
A LRN layer in a network definition.
Definition: NvInfer.h:1632
int64_t getWindowSize() const noexcept
Get the LRN window size.
Definition: NvInfer.h:1653
float getAlpha() const noexcept
Get the LRN alpha value.
Definition: NvInfer.h:1675
void setWindowSize(int64_t windowSize) noexcept
Set the LRN window size.
Definition: NvInfer.h:1643
void setK(float k) noexcept
Set the LRN K value.
Definition: NvInfer.h:1709
void setAlpha(float alpha) noexcept
Set the LRN alpha value.
Definition: NvInfer.h:1665
void setBeta(float beta) noexcept
Set the LRN beta value.
Definition: NvInfer.h:1687
virtual ~ILRNLayer() noexcept=default
float getBeta() const noexcept
Get the LRN beta value.
Definition: NvInfer.h:1697
float getK() const noexcept
Get the LRN K value.
Definition: NvInfer.h:1719
Base class for all layer classes in a network definition.
Definition: NvInfer.h:462
TRT_DEPRECATED void setPrecision(DataType dataType) noexcept
Set the preferred or required computational precision of this layer in a weakly-typed network.
Definition: NvInfer.h:582
TRT_DEPRECATED void setOutputType(int32_t index, DataType dataType) noexcept
Set the output type of this layer in a weakly-typed network.
Definition: NvInfer.h:670
TRT_DEPRECATED bool precisionIsSet() const noexcept
whether the computational precision has been set for this layer
Definition: NvInfer.h:608
void setMetadata(char const *metadata) noexcept
Set the metadata for this layer.
Definition: NvInfer.h:733
TRT_DEPRECATED void resetOutputType(int32_t index) noexcept
reset the output type for this layer
Definition: NvInfer.h:715
void setName(char const *name) noexcept
Set the name of a layer.
Definition: NvInfer.h:483
int32_t getNbInputs() const noexcept
Get the number of inputs of a layer.
Definition: NvInfer.h:501
char const * getMetadata() const noexcept
Get the metadata of the layer.
Definition: NvInfer.h:746
DataType getOutputType(int32_t index) const noexcept
get the output type of this layer
Definition: NvInfer.h:685
DataType getPrecision() const noexcept
get the computational precision of this layer
Definition: NvInfer.h:594
TRT_DEPRECATED bool outputTypeIsSet(int32_t index) const noexcept
whether the output type has been set for this layer
Definition: NvInfer.h:701
char const * getName() const noexcept
Return the name of a layer.
Definition: NvInfer.h:493
int32_t getNbOutputs() const noexcept
Get the number of outputs of a layer.
Definition: NvInfer.h:522
ITensor * getOutput(int32_t index) const noexcept
Get the layer output corresponding to the given index.
Definition: NvInfer.h:532
void setInput(int32_t index, ITensor &tensor) noexcept
Replace an input of this layer with a specific tensor.
Definition: NvInfer.h:549
ITensor * getInput(int32_t index) const noexcept
Get the layer input corresponding to the given index.
Definition: NvInfer.h:514
LayerType getType() const noexcept
Return the type of a layer.
Definition: NvInfer.h:469
TRT_DEPRECATED void resetPrecision() noexcept
reset the computational precision for this layer
Definition: NvInfer.h:620
virtual ~ILayer() noexcept=default
Application-implemented logging interface for the builder, refitter and runtime.
Definition: NvInferRuntime.h:1588
This is a base class for Loop boundary layers.
Definition: NvInfer.h:4416
virtual ~ILoopBoundaryLayer() noexcept=default
ILoop * getLoop() const noexcept
Get a pointer to ILoop associated with this boundary layer.
Definition: NvInfer.h:4421
Helper for creating a recurrent subgraph.
Definition: NvInfer.h:4811
void setName(char const *name) noexcept
Set the name of the loop.
Definition: NvInfer.h:4881
ITripLimitLayer * addTripLimit(ITensor &tensor, TripLimit limit) noexcept
Add a trip-count limiter, based on the given tensor.
Definition: NvInfer.h:4840
IIteratorLayer * addIterator(ITensor &tensor, int32_t axis=0, bool reverse=false) noexcept
Return layer that subscripts tensor by loop iteration.
Definition: NvInfer.h:4853
ILoopOutputLayer * addLoopOutput(ITensor &tensor, LoopOutput outputKind, int32_t axis=0) noexcept
Make an output for this loop, based on the given tensor.
Definition: NvInfer.h:4866
virtual ~ILoop() noexcept=default
char const * getName() const noexcept
Return the name of the loop.
Definition: NvInfer.h:4891
IRecurrenceLayer * addRecurrence(ITensor &initialValue) noexcept
Create a recurrence layer for this loop with initialValue as its first input.
Definition: NvInfer.h:4819
An ILoopOutputLayer is the sole way to get output from a loop.
Definition: NvInfer.h:4653
virtual ~ILoopOutputLayer() noexcept=default
int32_t getAxis() const noexcept
Get axis being concatenated over.
Definition: NvInfer.h:4683
LoopOutput getLoopOutput() const noexcept
Get which kind a loop output has.
Definition: NvInfer.h:4658
void setAxis(int32_t axis) noexcept
Set where to insert the contenation axis. Ignored if getLoopOutput() is kLAST_VALUE.
Definition: NvInfer.h:4675
Layer that represents a Matrix Multiplication.
Definition: NvInfer.h:3631
apiv::VMatrixMultiplyLayer * mImpl
Definition: NvInfer.h:3659
virtual ~IMatrixMultiplyLayer() noexcept=default
MatrixOperation getOperation(int32_t index) const noexcept
Get the operation for an input tensor.
Definition: NvInfer.h:3653
void setOperation(int32_t index, MatrixOperation op) noexcept
Set the operation for an input tensor.
Definition: NvInfer.h:3641
A non-maximum suppression layer in a network definition.
Definition: NvInfer.h:6206
virtual ~INMSLayer() noexcept=default
void setTopKBoxLimit(int32_t limit) noexcept
Set the TopK box limit parameter for the layer.
Definition: NvInfer.h:6243
void setBoundingBoxFormat(BoundingBoxFormat fmt) noexcept
Set the bounding box format parameter for the layer.
Definition: NvInfer.h:6217
BoundingBoxFormat getBoundingBoxFormat() const noexcept
Get the bounding box format parameter for the layer.
Definition: NvInfer.h:6229
bool setIndicesType(DataType type) noexcept
Set the indices type for the layer.
Definition: NvInfer.h:6288
apiv::VNMSLayer * mImpl
Definition: NvInfer.h:6306
int32_t getTopKBoxLimit() const noexcept
Get the TopK box limit parameter for the layer.
Definition: NvInfer.h:6253
DataType getIndicesType() const noexcept
Return the NMS layer indices type.
Definition: NvInfer.h:6300
A network definition for input to the builder.
Definition: NvInfer.h:7344
IConcatenationLayer * addConcatenation(ITensor *const *inputs, int32_t nbInputs) noexcept
Add a concatenation layer to the network.
Definition: NvInfer.h:7572
IShuffleLayer * addShuffle(ITensor &input) noexcept
Add a shuffle layer to the network.
Definition: NvInfer.h:7635
void setName(char const *name) noexcept
Sets the name of the network.
Definition: NvInfer.h:8061
ITopKLayer * addTopK(ITensor &input, TopKOperation op, int32_t k, uint32_t reduceAxes, DataType indicesType) noexcept
Add a TopK layer to the network.
Definition: NvInfer.h:7831
bool markDebug(ITensor &tensor) noexcept
Mark a tensor as a debug tensor.
Definition: NvInfer.h:7415
ILRNLayer * addLRN(ITensor &input, int64_t window, float alpha, float beta, float k) noexcept
Add a LRN layer to the network.
Definition: NvInfer.h:7516
ICumulativeLayer * addCumulative(ITensor &input, ITensor &axis, CumulativeOperation operation, bool exclusive, bool reverse) noexcept
Add a cumulative layer to the network.
Definition: NvInfer.h:8728
IAssertionLayer * addAssertion(ITensor &condition, char const *message) noexcept
Add an assertion layer to the network.
Definition: NvInfer.h:8363
TRT_DEPRECATED INonZeroLayer * addNonZero(ITensor &input) noexcept
Add a nonzero layer to the network.
Definition: NvInfer.h:7922
IConvolutionLayer * addConvolutionNd(ITensor &input, int64_t nbOutputMaps, Dims const &kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
Add a multi-dimension convolution layer to the network.
Definition: NvInfer.h:8182
ICastLayer * addCast(ITensor &input, DataType toType) noexcept
Add a cast layer.
Definition: NvInfer.h:7991
IScaleLayer * addScaleNd(ITensor &input, ScaleMode mode, Weights shift, Weights scale, Weights power, int32_t channelAxis) noexcept
Add a multi-dimension scale layer to the network.
Definition: NvInfer.h:8261
char const * getName() const noexcept
Returns the name associated with the network.
Definition: NvInfer.h:8075
IParametricReLULayer * addParametricReLU(ITensor &input, ITensor &slope) noexcept
Add a parametric ReLU layer to the network.
Definition: NvInfer.h:8160
ITensor * getOutput(int32_t index) const noexcept
Get the output tensor specified by the given index.
Definition: NvInfer.h:7736
ITensor * getInput(int32_t index) const noexcept
Get the input tensor specified by the given index.
Definition: NvInfer.h:7706
TRT_DEPRECATED ITopKLayer * addTopK(ITensor &input, TopKOperation op, int32_t k, uint32_t reduceAxes) noexcept
Add a TopK layer to the network.
Definition: NvInfer.h:7798
IDequantizeLayer * addDequantize(ITensor &input, ITensor &scale, DataType outputType) noexcept
Add a dequantization layer to the network.
Definition: NvInfer.h:8486
bool unmarkOutputForShapes(ITensor &tensor) noexcept
Undo markOutputForShapes.
Definition: NvInfer.h:8142
IFillLayer * addFill(Dims const &dimensions, FillOperation op, DataType outputType) noexcept
Add a fill layer to the network.
Definition: NvInfer.h:8389
ILoop * addLoop() noexcept
Add a loop to the network.
Definition: NvInfer.h:8292
bool markUnfusedTensorsAsDebugTensors() noexcept
Mark unfused tensors as debug tensors.
Definition: NvInfer.h:7463
TRT_NODISCARD INormalizationLayer * addNormalizationV2(ITensor &input, ITensor &scale, ITensor &bias, uint32_t axesMask) noexcept
Add a normalization layer to the network.
Definition: NvInfer.h:8930
IActivationLayer * addActivation(ITensor &input, ActivationType type) noexcept
Add an activation layer to the network.
Definition: NvInfer.h:7497
ISliceLayer * addSlice(ITensor &input, Dims const &start, Dims const &size, Dims const &stride) noexcept
Add a slice layer to the network.
Definition: NvInfer.h:8037
virtual ~INetworkDefinition() noexcept=default
virtual IBuilder & getBuilder() const noexcept
Return the builder from which this INetworkDefinition was created.
Definition: NvInfer.h:8826
ILayer * getLayer(int32_t index) const noexcept
Get the layer specified by the given index.
Definition: NvInfer.h:7678
bool isDebugTensor(ITensor const &tensor) const noexcept
Check if a tensor is marked as debug tensor.
Definition: NvInfer.h:7441
bool getFlag(NetworkDefinitionCreationFlag networkDefinitionCreationFlag) const noexcept
Returns true if the network definition creation flag is set.
Definition: NvInfer.h:8113
IIfConditional * addIfConditional() noexcept
Add an if-then-else to the network.
Definition: NvInfer.h:8307
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInfer.h:8463
ISqueezeLayer * addSqueeze(ITensor &input, ITensor &axes) noexcept
Add a squeeze layer to the network.
Definition: NvInfer.h:8883
TRT_DEPRECATED INMSLayer * addNMS(ITensor &boxes, ITensor &scores, ITensor &maxOutputBoxesPerClass) noexcept
Add a non-maximum suppression layer to the network.
Definition: NvInfer.h:8637
IReverseSequenceLayer * addReverseSequence(ITensor &input, ITensor &sequenceLens) noexcept
Add a ReverseSequence layer to the network.
Definition: NvInfer.h:8674
TRT_DEPRECATED IDynamicQuantizeLayer * addDynamicQuantize(ITensor &input, int32_t axis, int32_t blockSize, DataType outputType, DataType scaleType) noexcept
Add a dynamic quantization layer to the network.
Definition: NvInfer.h:8557
int32_t getNbInputs() const noexcept
Get the number of inputs in the network.
Definition: NvInfer.h:7690
NetworkDefinitionCreationFlags getFlags() const noexcept
Get the network definition creation flags for this network definition object. Defaults to 0.
Definition: NvInfer.h:8101
IQuantizeLayer * addQuantize(ITensor &input, ITensor &scale, DataType outputType) noexcept
Add a quantization layer to the network.
Definition: NvInfer.h:8530
IDynamicQuantizeLayer * addDynamicQuantizeV2(ITensor &input, Dims const &blockShape, DataType outputType, DataType scaleType) noexcept
Add a dynamic quantization layer to the network.
Definition: NvInfer.h:8581
IReduceLayer * addReduce(ITensor &input, ReduceOperation operation, uint32_t reduceAxes, bool keepDimensions) noexcept
Add a reduce layer to the network.
Definition: NvInfer.h:7762
IUnaryLayer * addUnary(ITensor &input, UnaryOperation operation) noexcept
Add a unary layer to the network.
Definition: NvInfer.h:7621
IGridSampleLayer * addGridSample(ITensor &input, ITensor &grid) noexcept
Add a GridSample layer to the network.
Definition: NvInfer.h:8615
void removeTensor(ITensor &tensor) noexcept
remove a tensor from the network definition.
Definition: NvInfer.h:8006
bool areWeightsMarkedRefittable(char const *name) const noexcept
Whether the weight has been marked as refittable.
Definition: NvInfer.h:8864
ISelectLayer * addSelect(ITensor &condition, ITensor &thenInput, ITensor &elseInput) noexcept
Add a select layer to the network.
Definition: NvInfer.h:8346
IScatterLayer * addScatter(ITensor &data, ITensor &indices, ITensor &updates, ScatterMode mode) noexcept
Add a Scatter layer to the network with specified mode and axis=0.
Definition: NvInfer.h:8506
TRT_DEPRECATED INormalizationLayer * addNormalization(ITensor &input, ITensor &scale, ITensor &bias, uint32_t axesMask) noexcept
Add a normalization layer to the network.
Definition: NvInfer.h:8706
int32_t getNbLayers() const noexcept
Get the number of layers in the network.
Definition: NvInfer.h:7664
apiv::VNetworkDefinition * mImpl
Definition: NvInfer.h:8936
IKVCacheUpdateLayer * addKVCacheUpdate(ITensor &cache, ITensor &update, ITensor &writeIndices, KVCacheMode cacheMode) noexcept
Add a KVCacheUpdate layer to the network.
Definition: NvInfer.h:8814
bool markOutputForShapes(ITensor &tensor) noexcept
Enable tensor's value to be computed by IExecutionContext::getShapeBinding.
Definition: NvInfer.h:8130
IOneHotLayer * addOneHot(ITensor &indices, ITensor &values, ITensor &depth, int32_t axis) noexcept
Add a OneHot layer to the network.
Definition: NvInfer.h:7652
IScaleLayer * addScale(ITensor &input, ScaleMode mode, Weights shift, Weights scale, Weights power) noexcept
Add a Scale layer to the network.
Definition: NvInfer.h:7542
void unmarkOutput(ITensor &tensor) noexcept
unmark a tensor as a network output.
Definition: NvInfer.h:8018
IIdentityLayer * addIdentity(ITensor &input) noexcept
Add an identity layer.
Definition: NvInfer.h:7976
IGatherLayer * addGatherV2(ITensor &data, ITensor &indices, GatherMode mode) noexcept
Add gather with specified mode, axis=0 and nbElementWiseDims=0.
Definition: NvInfer.h:7863
INonZeroLayer * addNonZero(ITensor &input, DataType indicesType) noexcept
Add a nonzero layer to the network.
Definition: NvInfer.h:7938
IElementWiseLayer * addElementWise(ITensor &input1, ITensor &input2, ElementWiseOperation op) noexcept
Add an elementwise layer to the network.
Definition: NvInfer.h:7599
IConstantLayer * addConstant(Dims const &dimensions, Weights weights) noexcept
Add a constant layer to the network.
Definition: NvInfer.h:7962
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInfer.h:8448
IPoolingLayer * addPoolingNd(ITensor &input, PoolingType type, Dims const &windowSize) noexcept
Add a multi-dimension pooling layer to the network.
Definition: NvInfer.h:8202
INMSLayer * addNMS(ITensor &boxes, ITensor &scores, ITensor &maxOutputBoxesPerClass, DataType indicesType) noexcept
Add a non-maximum suppression layer to the network.
Definition: NvInfer.h:8657
IRaggedSoftMaxLayer * addRaggedSoftMax(ITensor &input, ITensor &bounds) noexcept
Add a RaggedSoftMax layer to the network.
Definition: NvInfer.h:7882
IShapeLayer * addShape(ITensor &input) noexcept
Add a shape layer to the network.
Definition: NvInfer.h:8091
IGatherLayer * addGather(ITensor &data, ITensor &indices, int32_t axis) noexcept
Add gather with mode GatherMode::kDEFAULT and specified axis and nbElementWiseDims=0.
Definition: NvInfer.h:7847
IAttention * addAttention(ITensor &query, ITensor &key, ITensor &value, AttentionNormalizationOp normOp, bool causal) noexcept
Add an attention to the network.
Definition: NvInfer.h:8755
bool unmarkWeightsRefittable(char const *name) noexcept
Unmark weights as refittable when the builder flag kREFIT_INDIVIDUAL is set.
Definition: NvInfer.h:8851
bool markWeightsRefittable(char const *name) noexcept
Mark weights as refittable when the builder flag kREFIT_INDIVIDUAL is set.
Definition: NvInfer.h:8839
IRotaryEmbeddingLayer * addRotaryEmbedding(ITensor &input, ITensor &cosCache, ITensor &sinCache, bool interleaved, int32_t rotaryEmbeddingDim) noexcept
Add a Rotary Position Embedding (RoPE) layer to the network.
Definition: NvInfer.h:8780
IDeconvolutionLayer * addDeconvolutionNd(ITensor &input, int64_t nbOutputMaps, Dims kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
Add a multi-dimension deconvolution layer to the network.
Definition: NvInfer.h:8224
IResizeLayer * addResize(ITensor &input) noexcept
Add a resize layer to the network.
Definition: NvInfer.h:8278
IUnsqueezeLayer * addUnsqueeze(ITensor &input, ITensor &axes) noexcept
Add an unsqueeze layer to the network.
Definition: NvInfer.h:8904
IMatrixMultiplyLayer * addMatrixMultiply(ITensor &input0, MatrixOperation op0, ITensor &input1, MatrixOperation op1) noexcept
Add a MatrixMultiply layer to the network.
Definition: NvInfer.h:7903
ISoftMaxLayer * addSoftMax(ITensor &input) noexcept
Add a SoftMax layer to the network.
Definition: NvInfer.h:7555
bool unmarkDebug(ITensor &tensor) noexcept
Unmark a tensor as a debug tensor.
Definition: NvInfer.h:7431
IEinsumLayer * addEinsum(ITensor *const *inputs, int32_t nbInputs, char const *equation) noexcept
Add an Einsum layer to the network.
Definition: NvInfer.h:8597
void markOutput(ITensor &tensor) noexcept
Mark a tensor as a network output.
Definition: NvInfer.h:7397
IPaddingLayer * addPaddingNd(ITensor &input, Dims const &prePadding, Dims const &postPadding) noexcept
Add a padding layer to the network. Only 2D padding is currently supported.
Definition: NvInfer.h:8405
int32_t getNbOutputs() const noexcept
Get the number of outputs in the network.
Definition: NvInfer.h:7720
bool setWeightsName(Weights weights, char const *name) noexcept
Associate a name with all current uses of the given weights.
Definition: NvInfer.h:8429
bool unmarkUnfusedTensorsAsDebugTensors() noexcept
Undo the marking of unfused tensors as debug tensors.
Definition: NvInfer.h:7477
Forward declaration of IEngineInspector for use by other interfaces.
Definition: NvInferRuntime.h:51
Definition: NvInfer.h:3685
DataType getIndicesType() const noexcept
Return the NonZero layer indices type.
Definition: NvInfer.h:3709
bool setIndicesType(DataType type) noexcept
Set the indices type for the layer.
Definition: NvInfer.h:3697
virtual ~INonZeroLayer() noexcept=default
A normalization layer in a network definition.
Definition: NvInfer.h:6395
float getEpsilon() const noexcept
Get the epsilon value used for the normalization calculation.
Definition: NvInfer.h:6414
uint32_t getAxes() const noexcept
Get the axes value used for the normalization calculation.
Definition: NvInfer.h:6434
virtual ~INormalizationLayer() noexcept=default
void setEpsilon(float eps) noexcept
Set the epsilon value used for the normalization calculation.
Definition: NvInfer.h:6404
TRT_NODISCARD bool isV2() const noexcept
Returns true if this layer was created through addNormalizationV2().
Definition: NvInfer.h:6511
DataType getComputePrecision() const noexcept
Get the compute precision of this layer.
Definition: NvInfer.h:6501
apiv::VNormalizationLayer * mImpl
Definition: NvInfer.h:6517
int64_t getNbGroups() const noexcept
Get the number of groups used to split the channels for the normalization calculation.
Definition: NvInfer.h:6465
void setAxes(uint32_t axesMask) noexcept
Set the reduction axes for the normalization calculation.
Definition: NvInfer.h:6424
void setComputePrecision(DataType type) noexcept
Set the compute precision of this layer.
Definition: NvInfer.h:6491
void setNbGroups(int64_t nbGroups) noexcept
Set the number of groups used to split the channels in the normalization calculation.
Definition: NvInfer.h:6455
A OneHot layer in a network definition.
Definition: NvInfer.h:6017
virtual ~IOneHotLayer() noexcept=default
apiv::VOneHotLayer * mImpl
Definition: NvInfer.h:6038
void setAxis(int32_t axis) noexcept
Set the axis parameter.
Definition: NvInfer.h:6024
int32_t getAxis() const noexcept
Get the value of the axis parameter.
Definition: NvInfer.h:6032
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:2672
Layer that represents a padding operation.
Definition: NvInfer.h:2882
Dims getPostPaddingNd() const noexcept
Get the padding that is applied at the end of the tensor.
Definition: NvInfer.h:2931
void setPrePaddingNd(Dims const &padding) noexcept
Set the padding that is applied at the start of the tensor.
Definition: NvInfer.h:2893
virtual ~IPaddingLayer() noexcept=default
void setPostPaddingNd(Dims const &padding) noexcept
Set the padding that is applied at the end of the tensor.
Definition: NvInfer.h:2919
Dims getPrePaddingNd() const noexcept
Get the padding that is applied at the start of the tensor.
Definition: NvInfer.h:2905
apiv::VPaddingLayer * mImpl
Definition: NvInfer.h:2937
Layer that represents a parametric ReLU operation.
Definition: NvInfer.h:3900
apiv::VParametricReLULayer * mImpl
Definition: NvInfer.h:3902
virtual ~IParametricReLULayer() noexcept=default
Single registration point for all plugins in an application. It is used to find plugin implementation...
Definition: NvInferRuntimeCommon.h:56
Plugin class for user-implemented layers.
Definition: NvInferRuntimePlugin.h:139
Layer type for pluginV2.
Definition: NvInfer.h:2619
virtual ~IPluginV2Layer() noexcept=default
apiv::VPluginV2Layer * mImpl
Definition: NvInfer.h:2632
IPluginV2 & getPlugin() noexcept
Get the plugin for the layer.
Definition: NvInfer.h:2626
Layer type for V3 plugins.
Definition: NvInfer.h:2646
virtual ~IPluginV3Layer() noexcept=default
IPluginV3 & getPlugin() noexcept
Get the plugin for the layer.
Definition: NvInfer.h:2653
apiv::VPluginV3Layer * mImpl
Definition: NvInfer.h:2659
A Pooling layer in a network definition.
Definition: NvInfer.h:1381
PoolingType getPoolingType() const noexcept
Get the type of activation to be performed.
Definition: NvInfer.h:1400
PaddingMode getPaddingMode() const noexcept
Get the padding mode.
Definition: NvInfer.h:1533
Dims getPostPadding() const noexcept
Get the padding.
Definition: NvInfer.h:1509
bool getAverageCountExcludesPadding() const noexcept
Get whether average pooling uses as a denominator the overlap area between the window and the unpadde...
Definition: NvInfer.h:1453
Dims getPrePadding() const noexcept
Get the pre-padding.
Definition: NvInfer.h:1481
void setPoolingType(PoolingType type) noexcept
Set the type of activation to be performed.
Definition: NvInfer.h:1390
void setWindowSizeNd(Dims const &windowSize) noexcept
Set the multi-dimension window size for pooling.
Definition: NvInfer.h:1546
void setPaddingMode(PaddingMode paddingMode) noexcept
Set the padding mode.
Definition: NvInfer.h:1522
Dims getWindowSizeNd() const noexcept
Get the multi-dimension window size for pooling.
Definition: NvInfer.h:1556
void setAverageCountExcludesPadding(bool exclusive) noexcept
Set whether average pooling uses as a denominator the overlap area between the window and the unpadde...
Definition: NvInfer.h:1442
void setPaddingNd(Dims const &padding) noexcept
Set the multi-dimension padding for pooling.
Definition: NvInfer.h:1600
float getBlendFactor() const noexcept
Get the blending factor for the max_average_blend mode: max_average_blendPool = (1-blendFactor)*maxPo...
Definition: NvInfer.h:1428
void setStrideNd(Dims const &stride) noexcept
Set the multi-dimension stride for pooling.
Definition: NvInfer.h:1571
Dims getStrideNd() const noexcept
Get the multi-dimension stride for pooling.
Definition: NvInfer.h:1581
virtual ~IPoolingLayer() noexcept=default
Dims getPaddingNd() const noexcept
Get the multi-dimension padding for pooling.
Definition: NvInfer.h:1612
void setPostPadding(Dims const &padding) noexcept
Set the multi-dimension post-padding for pooling.
Definition: NvInfer.h:1499
void setPrePadding(Dims const &padding) noexcept
Set the multi-dimension pre-padding for pooling.
Definition: NvInfer.h:1471
void setBlendFactor(float blendFactor) noexcept
Set the blending factor for the max_average_blend mode: max_average_blendPool = (1-blendFactor)*maxPo...
Definition: NvInfer.h:1415
A Quantize layer in a network definition.
Definition: NvInfer.h:5393
void setToType(DataType toType) noexcept
Set the Quantize layer output type.
Definition: NvInfer.h:5454
bool setBlockShape(Dims const &blockShape) noexcept
Set the shape of the quantization block.
Definition: NvInfer.h:5427
void setAxis(int32_t axis) noexcept
Set the quantization axis.
Definition: NvInfer.h:5414
TRT_NODISCARD Dims getBlockShape() const noexcept
Get the shape of the quantization block.
Definition: NvInfer.h:5438
int32_t getAxis() const noexcept
Get the quantization axis.
Definition: NvInfer.h:5403
virtual ~IQuantizeLayer() noexcept=default
DataType getToType() const noexcept
Return the Quantize layer output type.
Definition: NvInfer.h:5466
A RaggedSoftmax layer in a network definition.
Definition: NvInfer.h:3734
apiv::VRaggedSoftMaxLayer * mImpl
Definition: NvInfer.h:3736
virtual ~IRaggedSoftMaxLayer() noexcept=default
A recurrence layer in a network definition.
Definition: NvInfer.h:4606
virtual ~IRecurrenceLayer() noexcept=default
Layer that represents a reduction across a non-bool tensor.
Definition: NvInfer.h:2802
void setKeepDimensions(bool keepDimensions) noexcept
Set the boolean that specifies whether or not to keep the reduced dimensions for the layer.
Definition: NvInfer.h:2849
void setOperation(ReduceOperation op) noexcept
Set the reduce operation for the layer.
Definition: NvInfer.h:2809
ReduceOperation getOperation() const noexcept
Get the reduce operation for the layer.
Definition: NvInfer.h:2819
virtual ~IReduceLayer() noexcept=default
uint32_t getReduceAxes() const noexcept
Get the axes over which to reduce for the layer.
Definition: NvInfer.h:2839
void setReduceAxes(uint32_t reduceAxes) noexcept
Set the axes over which to reduce.
Definition: NvInfer.h:2829
apiv::VReduceLayer * mImpl
Definition: NvInfer.h:2865
bool getKeepDimensions() const noexcept
Get the boolean that specifies whether or not to keep the reduced dimensions for the layer.
Definition: NvInfer.h:2859
A resize layer in a network definition.
Definition: NvInfer.h:4089
void setSelectorForSinglePixel(ResizeSelector selector) noexcept
Set coordinate selector function when resized to single pixel.
Definition: NvInfer.h:4250
void setNearestRounding(ResizeRoundMode value) noexcept
Set rounding mode for nearest neighbor resize.
Definition: NvInfer.h:4274
virtual ~IResizeLayer() noexcept=default
int32_t getScales(int32_t size, float *scales) const noexcept
Copies resize scales to scales[0, ..., nbScales-1], where nbScales is the number of scales that were ...
Definition: NvInfer.h:4168
void setOutputDimensions(Dims const &dimensions) noexcept
Set the output dimensions.
Definition: NvInfer.h:4109
void setCubicCoeff(float A) noexcept
Set the coefficient 'A' used in cubic interpolation.
Definition: NvInfer.h:4306
void setScales(float const *scales, int32_t nbScales) noexcept
Set the resize scales.
Definition: NvInfer.h:4149
float getCubicCoeff() const noexcept
Get the coefficient 'A' used in cubic interpolation.
Definition: NvInfer.h:4316
ResizeSelector getSelectorForSinglePixel() const noexcept
Get the coordinate selector function when resized to single pixel.
Definition: NvInfer.h:4260
InterpolationMode getResizeMode() const noexcept
Get resize mode for an input tensor.
Definition: NvInfer.h:4190
void setCoordinateTransformation(ResizeCoordinateTransformation coordTransform) noexcept
Set coordinate transformation function.
Definition: NvInfer.h:4225
void setExcludeOutside(bool excludeFlag) noexcept
Set the state for excluding outside pixels.
Definition: NvInfer.h:4329
void setResizeMode(InterpolationMode interpolationMode) noexcept
Set resize mode for an input tensor.
Definition: NvInfer.h:4180
Dims getOutputDimensions() const noexcept
Get the output dimensions.
Definition: NvInfer.h:4119
ResizeRoundMode getNearestRounding() const noexcept
Get rounding mode for nearest neighbor resize.
Definition: NvInfer.h:4284
bool getExcludeOutside() const noexcept
Get the state for excluding outside pixels.
Definition: NvInfer.h:4339
ResizeCoordinateTransformation getCoordinateTransformation() const noexcept
Get coordinate transformation function.
Definition: NvInfer.h:4235
A ReverseSequence layer in a network definition.
Definition: NvInfer.h:6323
void setSequenceAxis(int32_t sequenceAxis) noexcept
Set the sequence axis. Default is 0.
Definition: NvInfer.h:6356
int32_t getBatchAxis() const noexcept
Return the batch axis. Return 1 if no batch axis was set.
Definition: NvInfer.h:6343
apiv::VReverseSequenceLayer * mImpl
Definition: NvInfer.h:6372
int32_t getSequenceAxis() const noexcept
Return the sequence axis. Return 0 if no sequence axis was set.
Definition: NvInfer.h:6366
void setBatchAxis(int32_t batchAxis) noexcept
Set the batch axis. Default is 1.
Definition: NvInfer.h:6333
virtual ~IReverseSequenceLayer() noexcept=default
Layer that implements Rotary Position Embedding (RoPE) (https://arxiv.org/abs/2104....
Definition: NvInfer.h:7171
TRT_NODISCARD int32_t getRotaryEmbeddingDim() const noexcept
Get the number of hidden dimensions participating in RoPE. The default value is 0,...
Definition: NvInfer.h:7211
virtual ~IRotaryEmbeddingLayer() noexcept=default
void setInterleaved(bool interleaved) noexcept
Set whether the input is in interleaved format, i.e., whether the 2-d vectors rotated are taken from ...
Definition: NvInfer.h:7178
TRT_NODISCARD bool setRotaryEmbeddingDim(int32_t rotaryEmbeddingDim) noexcept
Set the number of hidden dimensions participating in RoPE. The default value is 0,...
Definition: NvInfer.h:7200
apiv::VRotaryEmbeddingLayer * mImpl
Definition: NvInfer.h:7234
TRT_NODISCARD bool getInterleaved() const noexcept
Get whether the input is in interleaved format. The default value is false.
Definition: NvInfer.h:7189
A Scale layer in a network definition.
Definition: NvInfer.h:1778
Weights getScale() const noexcept
Get the scale value.
Definition: NvInfer.h:1835
Weights getPower() const noexcept
Get the power value.
Definition: NvInfer.h:1855
void setScale(Weights scale) noexcept
Set the scale value.
Definition: NvInfer.h:1825
void setPower(Weights power) noexcept
Set the power value.
Definition: NvInfer.h:1845
ScaleMode getMode() const noexcept
Get the scale mode.
Definition: NvInfer.h:1795
void setShift(Weights shift) noexcept
Set the shift value.
Definition: NvInfer.h:1805
void setChannelAxis(int32_t channelAxis) noexcept
Set the channel axis.
Definition: NvInfer.h:1891
Weights getShift() const noexcept
Get the shift value.
Definition: NvInfer.h:1815
virtual ~IScaleLayer() noexcept=default
void setMode(ScaleMode mode) noexcept
Set the scale mode.
Definition: NvInfer.h:1785
int32_t getChannelAxis() const noexcept
Get the channel axis.
Definition: NvInfer.h:1870
A scatter layer in a network definition. Supports several kinds of scattering.
Definition: NvInfer.h:5945
void setMode(ScatterMode mode) noexcept
Set the scatter mode.
Definition: NvInfer.h:5952
apiv::VScatterLayer * mImpl
Definition: NvInfer.h:5986
void setAxis(int32_t axis) noexcept
Set the axis used by ScatterMode::kELEMENTS.
Definition: NvInfer.h:5972
int32_t getAxis() const noexcept
Get the axis.
Definition: NvInfer.h:5980
ScatterMode getMode() const noexcept
Get the scatter mode.
Definition: NvInfer.h:5962
virtual ~IScatterLayer() noexcept=default
Select elements from two data tensors based on a condition tensor.
Definition: NvInfer.h:4914
virtual ~ISelectLayer() noexcept=default
Layer type for getting shape of a tensor.
Definition: NvInfer.h:3407
virtual ~IShapeLayer() noexcept=default
apiv::VShapeLayer * mImpl
Definition: NvInfer.h:3409
Layer type for shuffling data.
Definition: NvInfer.h:2970
apiv::VShuffleLayer * mImpl
Definition: NvInfer.h:3128
void setFirstTranspose(Permutation permutation) noexcept
Set the permutation applied by the first transpose operation.
Definition: NvInfer.h:2981
void setSecondTranspose(Permutation permutation) noexcept
Set the permutation applied by the second transpose operation.
Definition: NvInfer.h:3081
Dims getReshapeDimensions() const noexcept
Get the reshaped dimensions.
Definition: NvInfer.h:3034
void setReshapeDimensions(Dims const &dimensions) noexcept
Set the reshaped dimensions.
Definition: NvInfer.h:3021
Permutation getFirstTranspose() const noexcept
Get the permutation applied by the first transpose operation.
Definition: NvInfer.h:2993
virtual ~IShuffleLayer() noexcept=default
Permutation getSecondTranspose() const noexcept
Get the permutation applied by the second transpose operation.
Definition: NvInfer.h:3093
bool getZeroIsPlaceholder() const noexcept
Get meaning of 0 in reshape dimensions.
Definition: NvInfer.h:3122
void setZeroIsPlaceholder(bool zeroIsPlaceholder) noexcept
Set meaning of 0 in reshape dimensions.
Definition: NvInfer.h:3109
Slices an input tensor into an output tensor based on the offset and strides.
Definition: NvInfer.h:3222
void setStride(Dims const &stride) noexcept
Set the stride for computing the output slice data.
Definition: NvInfer.h:3291
apiv::VSliceLayer * mImpl
Definition: NvInfer.h:3390
virtual ~ISliceLayer() noexcept=default
void setSize(Dims const &size) noexcept
Set the dimensions of the output slice.
Definition: NvInfer.h:3262
void setAxes(Dims const &axes) noexcept
Set the axes for this ISliceLayer.
Definition: NvInfer.h:3369
void setStart(Dims const &start) noexcept
Set the start offset that the slice layer uses to create the output slice.
Definition: NvInfer.h:3233
Dims getStart() const noexcept
Get the start offset for the slice layer.
Definition: NvInfer.h:3248
void setMode(SampleMode mode) noexcept
Set the slice mode.
Definition: NvInfer.h:3316
Dims getSize() const noexcept
Get dimensions of the output slice.
Definition: NvInfer.h:3277
SampleMode getMode() const noexcept
Get the slice mode.
Definition: NvInfer.h:3326
Dims getStride() const noexcept
Get the stride for the output slice.
Definition: NvInfer.h:3306
Dims getAxes() const noexcept
Get the axes for this ISliceLayer.
Definition: NvInfer.h:3384
A Softmax layer in a network definition.
Definition: NvInfer.h:1922
void setAxes(uint32_t axes) noexcept
Set the axis along which softmax is computed. Currently, only one axis can be set.
Definition: NvInfer.h:1944
uint32_t getAxes() const noexcept
Get the axis along which softmax occurs.
Definition: NvInfer.h:1954
virtual ~ISoftMaxLayer() noexcept=default
Layer that represents a squeeze operation, removing unit dimensions of the first input tensor on a se...
Definition: NvInfer.h:6531
virtual ~ISqueezeLayer() noexcept=default
apiv::VSqueezeLayer * mImpl
Definition: NvInfer.h:6548
A tensor in a network definition.
Definition: NvInfer.h:187
void setAllowedFormats(TensorFormats formats) noexcept
Set allowed formats for an input or output tensor. By default all formats are allowed....
Definition: NvInfer.h:338
void setDimensions(Dims const &dimensions) noexcept
Set the dimensions of a tensor.
Definition: NvInfer.h:235
void setName(char const *name) noexcept
Set the tensor name.
Definition: NvInfer.h:204
bool isExecutionTensor() const noexcept
Whether the tensor is an execution tensor.
Definition: NvInfer.h:403
char const * getName() const noexcept
Get the tensor name.
Definition: NvInfer.h:216
bool isShapeTensor() const noexcept
Whether the tensor is a shape tensor.
Definition: NvInfer.h:382
bool isNetworkInput() const noexcept
Whether the tensor is a network input.
Definition: NvInfer.h:308
TRT_DEPRECATED void setType(DataType type) noexcept
Set the data type of a tensor.
Definition: NvInfer.h:285
bool isNetworkOutput() const noexcept
Whether the tensor is a network output.
Definition: NvInfer.h:316
DataType getType() const noexcept
Get the data type of a tensor.
Definition: NvInfer.h:300
apiv::VTensor * mImpl
Definition: NvInfer.h:450
virtual ~ITensor() noexcept=default
void setDimensionName(int32_t index, char const *name) noexcept
Name a dimension of an input tensor.
Definition: NvInfer.h:429
char const * getDimensionName(int32_t index) const noexcept
Get the name of an input dimension.
Definition: NvInfer.h:444
Dims getDimensions() const noexcept
Get the dimensions of a tensor.
Definition: NvInfer.h:249
TensorFormats getAllowedFormats() const noexcept
Get a bitmask of TensorFormat values that the tensor supports. For a shape tensor,...
Definition: NvInfer.h:351
Class to handle tactic timing info collected from builder.
Definition: NvInfer.h:9258
int64_t queryKeys(TimingCacheKey *keyBuffer, int64_t capacity) const noexcept
Query cache keys from Timing Cache.
Definition: NvInfer.h:9324
bool combine(ITimingCache const &inputCache, bool ignoreMismatch) noexcept
Combine input timing cache into local instance.
Definition: NvInfer.h:9295
TimingCacheValue query(TimingCacheKey const &key) const noexcept
Query value in a cache entry.
Definition: NvInfer.h:9341
virtual ~ITimingCache() noexcept=default
bool update(TimingCacheKey const &key, TimingCacheValue const &value) noexcept
Update values in a cache entry.
Definition: NvInfer.h:9363
apiv::VTimingCache * mImpl
Definition: NvInfer.h:9369
bool reset() noexcept
Empty the timing cache.
Definition: NvInfer.h:9305
Layer that represents a TopK reduction.
Definition: NvInfer.h:3447
void setK(int32_t k) noexcept
Set the static k value for the layer.
Definition: NvInfer.h:3478
void setReduceAxes(uint32_t reduceAxes) noexcept
Set which axes to reduce for the layer.
Definition: NvInfer.h:3502
TopKOperation getOperation() const noexcept
Get the operation for the layer.
Definition: NvInfer.h:3464
apiv::VTopKLayer * mImpl
Definition: NvInfer.h:3561
void setOperation(TopKOperation op) noexcept
Set the operation for the layer.
Definition: NvInfer.h:3454
bool setIndicesType(DataType type) noexcept
Set the indices type for the layer.
Definition: NvInfer.h:3543
int32_t getK() const noexcept
Get the k value for the layer.
Definition: NvInfer.h:3492
uint32_t getReduceAxes() const noexcept
Get the axes to reduce for the layer.
Definition: NvInfer.h:3512
virtual ~ITopKLayer() noexcept=default
DataType getIndicesType() const noexcept
Return the TopK layer indices type.
Definition: NvInfer.h:3555
A layer that represents a trip-count limiter.
Definition: NvInfer.h:4727
TripLimit getTripLimit() const noexcept
Get a trip limiter type.
Definition: NvInfer.h:4732
virtual ~ITripLimitLayer() noexcept=default
Layer that represents an unary operation.
Definition: NvInfer.h:2727
void setOperation(UnaryOperation op) noexcept
Set the unary operation for the layer.
Definition: NvInfer.h:2736
apiv::VUnaryLayer * mImpl
Definition: NvInfer.h:2752
UnaryOperation getOperation() const noexcept
Get the unary operation for the layer.
Definition: NvInfer.h:2746
virtual ~IUnaryLayer() noexcept=default
Layer that represents an unsqueeze operation, which reshapes the first input tensor by inserting unit...
Definition: NvInfer.h:6561
virtual ~IUnsqueezeLayer() noexcept=default
apiv::VUnsqueezeLayer * mImpl
Definition: NvInfer.h:6579
An Interface class for version control.
Definition: NvInferRuntimeBase.h:278
Version information associated with a TRT interface.
Definition: NvInferRuntimeBase.h:243
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:124
Definition: NvInferRuntimeBase.h:415
Definition: NvInferRuntime.h:1656
Definition: NvInferPluginBase.h:206
Definition: NvInfer.h:9607
virtual bool stepComplete(char const *phaseName, int32_t step) noexcept=0
Signal that a step of an optimizer phase has finished.
virtual ~IProgressMonitor() noexcept=default
virtual void phaseFinish(char const *phaseName) noexcept=0
Signal that a phase of the optimizer has finished.
virtual void phaseStart(char const *phaseName, char const *parentPhase, int32_t nbSteps) noexcept=0
Signal that a phase of the optimizer has started.
Definition: NvInferRuntime.h:666
IBuilder * createInferBuilder(ILogger &logger) noexcept
Create an instance of an IBuilder class.
Definition: NvInfer.h:10871
The TensorRT API version 1 namespace.
Definition: NvInferPluginBase.h:29
uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferRuntime.h:2958
ResizeSelector
The coordinate selector when resize to single pixel output.
Definition: NvInfer.h:3994
@ kFORMULA
Use formula to map the original index.
@ kUPPER
Select the upper left pixel.
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:76
MemoryPoolType
The type for memory pools used by TensorRT.
Definition: NvInfer.h:9380
ScaleMode
Controls how shift, scale and power are applied in a Scale layer.
Definition: NvInfer.h:1735
@ kUNIFORM
Identical coefficients across all elements of the tensor.
@ kCHANNEL
Per-channel coefficients.
RuntimePlatform
Describes the intended runtime platform (operating system and CPU architecture) for the execution of ...
Definition: NvInfer.h:8957
@ kNONE
Tensor is not an input or output.
HardwareCompatibilityLevel
Describes requirements of compatibility with GPU architectures other than that of the GPU on which th...
Definition: NvInfer.h:9499
CumulativeOperation
Enumerates the cumulative operations that may be performed by a Cumulative layer.
Definition: NvInfer.h:6595
BoundingBoxFormat
Representation of bounding box data used for the Boxes input tensor in INMSLayer.
Definition: NvInfer.h:6137
@ kCENTER_SIZES
(x_center, y_center, width, height) where (x_center, y_center) is the center point of the box
@ kCORNER_PAIRS
(x1, y1, x2, y2) where (x1, y1) and (x2, y2) are any pair of diagonal corners
constexpr int32_t EnumMax< BuilderFlag >() noexcept
Definition: NvInfer.h:9192
constexpr int32_t EnumMax< LayerType >() noexcept
Definition: NvInfer.h:121
ComputeCapability
Describes compute capability that an engine will be built for.
Definition: NvInfer.h:9548
@ kSM120
Target NVIDIA Blackwell GPU architecture (SM 12.0).
@ kSM75
Target NVIDIA Turing GPU architecture (SM 7.5).
@ kSM80
Target NVIDIA Ampere GPU architecture (SM 8.0).
@ kCURRENT
Use the compute capability of the current GPU in the environment.
@ kSM89
Target NVIDIA Ada Lovelace GPU architecture (SM 8.9).
@ kSM86
Target NVIDIA Ampere GPU architecture (SM 8.6).
@ kFP4
FP4 field type.
@ kINT8
INT8 field type.
@ kFP8
FP8 field type.
@ kBF16
BF16 field type.
@ kINT4
INT4 field type.
UnaryOperation
Enumerates the unary operations that may be performed by a Unary layer.
Definition: NvInfer.h:2680
@ kISINF
Return true if input value equals +/- infinity for floating-point data type.
@ kCOSH
Hyperbolic cosine.
@ kACOSH
Inverse hyperbolic cosine.
@ kERF
Gauss error function.
@ kISNAN
Return true if input value is a NaN for floating-point data type.
@ kACOS
Inverse cosine.
@ kABS
Absolute value.
@ kSINH
Hyperbolic sine.
@ kROUND
Round to nearest even for floating-point data type.
@ kATANH
Inverse hyperbolic tangent.
@ kASINH
Inverse hyperbolic sine.
@ kSIGN
Sign, If input > 0, output 1; if input < 0, output -1; if input == 0, output 0.
@ kEXP
Exponentiation.
@ kATAN
Inverse tangent.
constexpr int32_t EnumMax< ReduceOperation >() noexcept
Definition: NvInfer.h:2789
constexpr int32_t EnumMax< TripLimit >() noexcept
Definition: NvInfer.h:4395
ActivationType
Enumerates the types of activation to perform in an activation layer.
Definition: NvInfer.h:140
@ kSELU
Selu activation: x>0 ? beta * x : beta * (alpha*exp(x) - alpha)
@ kTANH
TanH activation.
@ kSCALED_TANH
Scaled tanh activation: alpha*tanh(beta*x)
@ kRELU
Rectified linear activation.
@ kELU
Elu activation: x>=0 ? x : alpha * (exp(x) - 1).
@ kLEAKY_RELU
LeakyRelu activation: x>=0 ? x : alpha * x.
@ kSOFTSIGN
Softsign activation: x / (1+|x|)
@ kHARD_SIGMOID
Hard sigmoid activation: max(0, min(1, alpha*x+beta))
@ kTHRESHOLDED_RELU
Thresholded ReLU activation: x>alpha ? x : 0.
@ kSIGMOID
Sigmoid activation.
@ kCLIP
Clip activation: max(alpha, min(beta, x))
@ kGELU_TANH
GELU tanh activation: 0.5 * x * (1 + tanh(sqrt(2/pi) * (0.044715F * pow(x, 3) + x)))
@ kGELU_ERF
GELU erf activation: 0.5 * x * (1 + erf(sqrt(0.5) * x))
@ kSOFTPLUS
Parametric softplus activation: alpha*log(exp(beta*x)+1)
FillOperation
Enumerates the tensor fill operations that may performed by a fill layer.
Definition: NvInfer.h:4975
@ kRANDOM_UNIFORM
Randomly draw values from a uniform distribution.
@ kRANDOM_NORMAL
Randomly draw values from a normal distribution.
ResizeRoundMode
The rounding mode for nearest neighbor resize.
Definition: NvInfer.h:4024
@ kHALF_UP
Round half up.
@ kHALF_DOWN
Round half down.
PaddingMode
Enumerates the modes of padding to perform in convolution, deconvolution and pooling layer,...
Definition: NvInfer.h:913
@ kSAME_LOWER
Use SAME padding, with prePadding >= postPadding.
@ kEXPLICIT_ROUND_DOWN
Use explicit padding, rounding output size down.
@ kEXPLICIT_ROUND_UP
Use explicit padding, rounding output size up.
@ kSAME_UPPER
Use SAME padding, with prePadding <= postPadding.
TripLimit
Enum that describes kinds of trip limits.
Definition: NvInfer.h:4383
@ kWHILE
Tensor is a scalar of type kBOOL. Loop terminates when value is false.
@ kCOUNT
Tensor is a scalar of type kINT32 or kINT64 that contains the trip count.
uint32_t NetworkDefinitionCreationFlags
Represents one or more NetworkDefinitionCreationFlag flags using binary OR operations....
Definition: NvInfer.h:10551
PreviewFeature
Define preview features.
Definition: NvInfer.h:9455
TilingOptimizationLevel
Define the optimization levels for Tiling.
Definition: NvInfer.h:9574
@ kFAST
Use a fast algorithm and heuristic based strategy. Slightly increases engine build time.
@ kFULL
Increase search space even wider. Significantly increases engine build time.
constexpr int32_t EnumMax< GatherMode >() noexcept
Definition: NvInfer.h:2439
DataType
The type of weights and tensors. The datatypes other than kBOOL, kINT32, and kINT64 are "activation d...
Definition: NvInferRuntimeBase.h:145
uint32_t BuilderFlags
Represents one or more BuilderFlag values using binary OR operations, e.g., 1U << BuilderFlag::kFP16 ...
Definition: NvInfer.h:8989
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:1350
constexpr int32_t EnumMax< ScaleMode >() noexcept
Definition: NvInfer.h:1747
LayerType
The type values of layer classes.
Definition: NvInfer.h:57
@ kGRID_SAMPLE
Grid sample layer.
@ kRAGGED_SOFTMAX
Ragged softmax layer.
@ kDECONVOLUTION
Deconvolution layer.
@ kREDUCE
Reduce layer.
@ kASSERTION
Assertion layer.
@ kTOPK
TopK layer.
@ kRESIZE
Resize Layer.
@ kCAST
Cast layer.
@ kPADDING
Padding layer.
@ kSQUEEZE
Squeeze Layer.
@ kATTENTION_INPUT
Attention Input.
@ kMATRIX_MULTIPLY
Matrix multiply layer.
@ kCONDITION
Condition layer.
@ kCUMULATIVE
Cumulative layer.
@ kCONDITIONAL_INPUT
Conditional Input layer.
@ kIDENTITY
Identity layer.
@ kNORMALIZATION
Normalization layer.
@ kQUANTIZE
Quantize layer.
@ kSCATTER
Scatter layer.
@ kCONVOLUTION
Convolution layer.
@ kPARAMETRIC_RELU
Parametric ReLU layer.
@ kATTENTION_OUTPUT
Attention Output.
@ kUNSQUEEZE
Unsqueeze Layer.
@ kCONCATENATION
Concatenation layer.
@ kONE_HOT
OneHot layer.
@ kREVERSE_SEQUENCE
Reverse sequence layer.
@ kSLICE
Slice layer.
@ kEINSUM
Einsum layer.
@ kSOFTMAX
SoftMax layer.
@ kSHAPE
Shape layer.
@ kROTARY_EMBEDDING
Rotary Embedding layer.
@ kRECURRENCE
Loop Recurrence layer.
@ kDEQUANTIZE
Dequantize layer.
@ kSHUFFLE
Shuffle layer.
@ kPLUGIN_V3
PluginV3 layer.
@ kITERATOR
Loop Iterator layer.
@ kPOOLING
Pooling layer.
@ kTRIP_LIMIT
Loop Trip limit layer.
@ kSCALE
Scale layer.
@ kDYNAMIC_QUANTIZE
Dynamic Quantize layer.
@ kGATHER
Gather layer.
@ kUNARY
UnaryOp operation Layer.
@ kACTIVATION
Activation layer.
@ kELEMENTWISE
Elementwise layer.
@ kSELECT
Select layer.
@ kPLUGIN_V2
PluginV2 layer.
@ kLOOP_OUTPUT
Loop output layer.
@ kCONDITIONAL_OUTPUT
Conditional Output layer.
@ kCONSTANT
Constant layer.
@ kNON_ZERO
NonZero layer.
@ kFILL
Fill layer.
@ kKVCACHE_UPDATE
KV Cache Update layer.
@ kPLUGIN
Plugin layer.
SampleMode
Controls how ISliceLayer and IGridSample handle out-of-bounds coordinates.
Definition: NvInfer.h:3138
@ kCLAMP
Out of bounds indices are clamped to bounds.
@ kSTRICT_BOUNDS
Fail with error when the coordinates are out of bounds.
@ kWRAP
Coordinates wrap around periodically.
GatherMode
Control form of IGatherLayer.
Definition: NvInfer.h:2427
@ kDEFAULT
Similar to ONNX Gather.
@ kELEMENT
Similar to ONNX GatherElements.
@ kND
Similar to ONNX GatherND.
uint32_t TensorFormats
It is capable of representing one or more TensorFormat by binary OR operations, e....
Definition: NvInfer.h:132
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
Definition: NvInferRuntime.h:2970
NetworkDefinitionCreationFlag
List of immutable network properties expressed at network creation time. NetworkDefinitionCreationFla...
Definition: NvInfer.h:10562
ElementWiseOperation
Enumerates the binary operations that may be performed by an ElementWise layer.
Definition: NvInfer.h:2337
@ kSUB
Subtract the second element from the first.
@ kSUM
Sum of the two elements.
@ kPROD
Product of the two elements.
@ kFLOOR_DIV
Floor division of the first element by the second.
@ kEQUAL
Check if two elements are equal.
@ kAND
Logical AND of two elements.
@ kOR
Logical OR of two elements.
@ kMIN
Minimum of the two elements.
@ kPOW
The first element to the power of the second element.
@ kLESS
Check if element in first tensor is less than corresponding element in second tensor.
@ kGREATER
Check if element in first tensor is greater than corresponding element in second tensor.
@ kXOR
Logical XOR of two elements.
@ kDIV
Divide the first element by the second.
constexpr int32_t EnumMax< SampleMode >() noexcept
Definition: NvInfer.h:3154
InterpolationMode
Enumerates various modes of interpolation.
Definition: NvInfer.h:3912
@ kNEAREST
ND (0 < N <= 8) nearest neighbor resizing.
@ kCUBIC
Supports bicubic (2D) interpolation.
@ kLINEAR
Supports linear (1D), bilinear (2D), and trilinear (3D) interpolation.
BuilderFlag
List of valid modes that the builder can enable when creating an engine from a network definition.
Definition: NvInfer.h:8999
@ kWEIGHT_STREAMING
Enable weight streaming for the current engine.
@ kDEBUG
Enable debugging of layers via synchronizing after every layer.
@ kGPU_FALLBACK
Enable layers marked to execute on GPU if layer cannot execute on DLA.
@ kSPARSE_WEIGHTS
Allow the builder to examine weights and use optimized functions when weights have suitable sparsity.
@ kEDITABLE_TIMING_CACHE
Enable editable timing cache.
@ kSTRIP_PLAN
Strip the refittable weights from the engine plan file.
@ kMONITOR_MEMORY
Enable memory monitor during build time.
@ kDISABLE_TIMING_CACHE
Disable reuse of timing information across identical layers.
@ kREFIT
Enable building a refittable engine.
constexpr int32_t EnumMax< TopKOperation >() noexcept
Definition: NvInfer.h:3430
TENSORRTAPI nvinfer1::IPluginRegistry * getBuilderPluginRegistry(nvinfer1::EngineCapability capability) noexcept
Return the plugin registry for building a Standard engine, or nullptr if no registry exists.
constexpr int32_t EnumMax< MemoryPoolType >() noexcept
Definition: NvInfer.h:9441
TopKOperation
Enumerates the operations that may be performed by a TopK layer.
Definition: NvInfer.h:3419
ReduceOperation
Enumerates the reduce operations that may be performed by a Reduce layer.
Definition: NvInfer.h:2775
constexpr int32_t EnumMax< LoopOutput >() noexcept
Definition: NvInfer.h:4372
constexpr int32_t EnumMax< NetworkDefinitionCreationFlag >() noexcept
Definition: NvInfer.h:10581
ScatterMode
Control form of IScatterLayer.
Definition: NvInfer.h:5871
MatrixOperation
Enumerates the operations that may be performed on a tensor by IMatrixMultiplyLayer before multiplica...
Definition: NvInfer.h:3572
@ kTRANSPOSE
Like kNONE, but transpose the matrix dimensions.
ResizeCoordinateTransformation
The resize coordinate transformation function.
Definition: NvInfer.h:3940
constexpr int32_t EnumMax< UnaryOperation >() noexcept
Definition: NvInfer.h:2714
LoopOutput
Enum that describes kinds of loop outputs.
Definition: NvInfer.h:4355
@ kLAST_VALUE
Output value is value of tensor for last iteration.
@ kCONCATENATE
Output value is concatenation of values of tensor for each iteration, in forward order.
@ kREVERSE
Output value is concatenation of values of tensor for each iteration, in reverse order.
constexpr int32_t EnumMax< BoundingBoxFormat >() noexcept
Definition: NvInfer.h:6150
constexpr int32_t EnumMax< MatrixOperation >() noexcept
Definition: NvInfer.h:3600
KVCacheMode
Enumerates the KVCache modes that may be performed by a KVCacheUpdate layer.
Definition: NvInfer.h:7244
PoolingType
The type of pooling to perform in a pooling layer.
Definition: NvInfer.h:1349
@ kAVERAGE
Average over elements. If the tensor is padded, the count includes the padding.
@ kMAX
Maximum over elements.
@ kMAX_AVERAGE_BLEND
Blending between max and average pooling: (1-blendFactor)*maxPool + blendFactor*avgPool.
v_1_0::IProgressMonitor IProgressMonitor
Definition: NvInfer.h:9690
constexpr int32_t EnumMax< FillOperation >() noexcept
Definition: NvInfer.h:5006
AttentionNormalizationOp
Enumerates the operations that may be performed by the normalization in the attention subgraph.
Definition: NvInfer.h:6730
constexpr int32_t EnumMax< ScatterMode >() noexcept
Definition: NvInfer.h:5882
Represents a permutation of dimensions.
Definition: NvInfer.h:2947
Declaration of EnumMaxImpl struct to store maximum number of elements in an enumeration type.
Definition: NvInferRuntimeBase.h:128
The key to retrieve timing cache entries.
Definition: NvInfer.h:9218
Definition: NvInfer.h:9232
uint64_t tacticHash
Hash of the selected tactic.
Definition: NvInfer.h:9234
float timingMSec
Timing of this tactic in milliseconds. Negative numbers and NaN are invalid values.
Definition: NvInfer.h:9236

  Copyright © 2024 NVIDIA Corporation
  Privacy Policy | Manage My Privacy | Do Not Sell or Share My Data | Terms of Service | Accessibility | Corporate Policies | Product Security | Contact