TensorRT for RTX 1.4.0
NvInfer.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright (c) 1993-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef NV_INFER_H
19#define NV_INFER_H
20
21#include "NvInferLegacyDims.h"
22#include "NvInferRuntime.h" // IWYU pragma: export
23
35
41
47namespace nvinfer1
48{
49
57enum class LayerType : int32_t
58{
59 kCONVOLUTION = 0,
60 kCAST = 1,
61 kACTIVATION = 2,
62 kPOOLING = 3,
63 kLRN = 4,
64 kSCALE = 5,
65 kSOFTMAX = 6,
66 kDECONVOLUTION = 7,
67 kCONCATENATION = 8,
68 kELEMENTWISE = 9,
69 kPLUGIN = 10,
70 kUNARY = 11,
71 kPADDING = 12,
72 kSHUFFLE = 13,
73 kREDUCE = 14,
74 kTOPK = 15,
75 kGATHER = 16,
76 kMATRIX_MULTIPLY = 17,
77 kRAGGED_SOFTMAX = 18,
78 kCONSTANT = 19,
79 kIDENTITY = 20,
80 kPLUGIN_V2 = 21,
81 kSLICE = 22,
82 kSHAPE = 23,
83 kPARAMETRIC_RELU = 24,
84 kRESIZE = 25,
85 kTRIP_LIMIT = 26,
86 kRECURRENCE = 27,
87 kITERATOR = 28,
88 kLOOP_OUTPUT = 29,
89 kSELECT = 30,
90 kFILL = 31,
91 kQUANTIZE = 32,
92 kDEQUANTIZE = 33,
93 kCONDITION = 34,
96 kSCATTER = 37,
97 kEINSUM = 38,
98 kASSERTION = 39,
99 kONE_HOT = 40,
100 kNON_ZERO = 41,
101 kGRID_SAMPLE = 42,
102 kNMS = 43,
103 kREVERSE_SEQUENCE = 44,
104 kNORMALIZATION = 45,
105 kPLUGIN_V3 = 46,
106 kSQUEEZE = 47,
107 kUNSQUEEZE = 48,
108 kCUMULATIVE = 49,
109 kDYNAMIC_QUANTIZE = 50,
110 kATTENTION_INPUT = 51,
111 kATTENTION_OUTPUT = 52,
112 kROTARY_EMBEDDING = 53,
113 kKVCACHE_UPDATE = 54,
114 kMOE = 55,
115 kDIST_COLLECTIVE = 56,
116};
117
123template <>
124constexpr inline int32_t EnumMax<LayerType>() noexcept
125{
126 return 57;
127}
128
135using TensorFormats = uint32_t;
136
142enum class ActivationType : int32_t
143{
144 kRELU = 0,
145 kSIGMOID = 1,
146 kTANH = 2,
147 kLEAKY_RELU = 3,
148 kELU = 4,
149 kSELU = 5,
150 kSOFTSIGN = 6,
151 kSOFTPLUS = 7,
152 kCLIP = 8,
153 kHARD_SIGMOID = 9,
154 kSCALED_TANH = 10,
155 kTHRESHOLDED_RELU = 11,
156 kGELU_ERF = 12,
157 kGELU_TANH = 13
158};
159
160namespace impl
161{
167template <>
169{
170 static constexpr int32_t kVALUE = 14;
171};
172} // namespace impl
173
188class ITensor : public INoCopy
189{
190public:
206 void setName(char const* name) noexcept
207 {
208 mImpl->setName(name);
209 }
210
218 char const* getName() const noexcept
219 {
220 return mImpl->getName();
221 }
222
237 void setDimensions(Dims const& dimensions) noexcept
238 {
239 mImpl->setDimensions(dimensions);
240 }
241
251 Dims getDimensions() const noexcept
252 {
253 return mImpl->getDimensions();
254 }
255
287 TRT_DEPRECATED void setType(DataType type) noexcept
288 {
289 mImpl->setType(type);
290 }
291
302 DataType getType() const noexcept
303 {
304 return mImpl->getType();
305 }
306
310 bool isNetworkInput() const noexcept
311 {
312 return mImpl->isNetworkInput();
313 }
314
318 bool isNetworkOutput() const noexcept
319 {
320 return mImpl->isNetworkOutput();
321 }
322
340 void setAllowedFormats(TensorFormats formats) noexcept
341 {
342 mImpl->setAllowedFormats(formats);
343 }
344
354 {
355 return mImpl->getAllowedFormats();
356 }
357
384 bool isShapeTensor() const noexcept
385 {
386 return mImpl->isShapeTensor();
387 }
388
405 bool isExecutionTensor() const noexcept
406 {
407 return mImpl->isExecutionTensor();
408 }
409
431 void setDimensionName(int32_t index, char const* name) noexcept
432 {
433 mImpl->setDimensionName(index, name);
434 }
435
446 char const* getDimensionName(int32_t index) const noexcept
447 {
448 return mImpl->getDimensionName(index);
449 }
450
451protected:
452 apiv::VTensor* mImpl;
453 virtual ~ITensor() noexcept = default;
454};
455
463class ILayer : public INoCopy
464{
465public:
471 LayerType getType() const noexcept
472 {
473 return mLayer->getType();
474 }
475
485 void setName(char const* name) noexcept
486 {
487 mLayer->setName(name);
488 }
489
495 char const* getName() const noexcept
496 {
497 return mLayer->getName();
498 }
499
503 int32_t getNbInputs() const noexcept
504 {
505 return mLayer->getNbInputs();
506 }
507
516 ITensor* getInput(int32_t index) const noexcept
517 {
518 return mLayer->getInput(index);
519 }
520
524 int32_t getNbOutputs() const noexcept
525 {
526 return mLayer->getNbOutputs();
527 }
528
534 ITensor* getOutput(int32_t index) const noexcept
535 {
536 return mLayer->getOutput(index);
537 }
538
551 void setInput(int32_t index, ITensor& tensor) noexcept
552 {
553 return mLayer->setInput(index, tensor);
554 }
555
584 TRT_DEPRECATED void setPrecision(DataType dataType) noexcept
585 {
586 mLayer->setPrecision(dataType);
587 }
588
596 DataType getPrecision() const noexcept
597 {
598 return mLayer->getPrecision();
599 }
600
610 TRT_DEPRECATED bool precisionIsSet() const noexcept
611 {
612 return mLayer->precisionIsSet();
613 }
614
623 {
624 mLayer->resetPrecision();
625 }
626
672 TRT_DEPRECATED void setOutputType(int32_t index, DataType dataType) noexcept
673 {
674 mLayer->setOutputType(index, dataType);
675 }
676
687 DataType getOutputType(int32_t index) const noexcept
688 {
689 return mLayer->getOutputType(index);
690 }
691
703 TRT_DEPRECATED bool outputTypeIsSet(int32_t index) const noexcept
704 {
705 return mLayer->outputTypeIsSet(index);
706 }
707
717 TRT_DEPRECATED void resetOutputType(int32_t index) noexcept
718 {
719 return mLayer->resetOutputType(index);
720 }
721
735 void setMetadata(char const* metadata) noexcept
736 {
737 mLayer->setMetadata(metadata);
738 }
739
748 char const* getMetadata() const noexcept
749 {
750 return mLayer->getMetadata();
751 }
752
769 bool setNbRanks(int32_t nbRanks) noexcept
770 {
771 return mLayer->setNbRanks(nbRanks);
772 }
773
781 int32_t getNbRanks() const noexcept
782 {
783 return mLayer->getNbRanks();
784 }
785
786protected:
787 virtual ~ILayer() noexcept = default;
788 apiv::VLayer* mLayer;
789};
790
947enum class PaddingMode : int32_t
948{
951 kSAME_UPPER = 2,
952 kSAME_LOWER = 3,
953};
954
955namespace impl
956{
962template <>
964{
965 static constexpr int32_t kVALUE = 4;
966};
967} // namespace impl
968
982{
983public:
991 void setNbOutputMaps(int64_t nbOutputMaps) noexcept
992 {
993 mImpl->setNbOutputMaps(nbOutputMaps);
994 }
995
1001 int64_t getNbOutputMaps() const noexcept
1002 {
1003 return mImpl->getNbOutputMaps();
1004 }
1005
1021 void setNbGroups(int64_t nbGroups) noexcept
1022 {
1023 mImpl->setNbGroups(nbGroups);
1024 }
1025
1031 int64_t getNbGroups() const noexcept
1032 {
1033 return mImpl->getNbGroups();
1034 }
1035
1045 void setKernelWeights(Weights weights) noexcept
1046 {
1047 mImpl->setKernelWeights(weights);
1048 }
1049
1055 Weights getKernelWeights() const noexcept
1056 {
1057 return mImpl->getKernelWeights();
1058 }
1059
1070 void setBiasWeights(Weights weights) noexcept
1071 {
1072 mImpl->setBiasWeights(weights);
1073 }
1074
1080 Weights getBiasWeights() const noexcept
1081 {
1082 return mImpl->getBiasWeights();
1083 }
1084
1097 void setPrePadding(Dims const& padding) noexcept
1098 {
1099 mImpl->setPrePadding(padding);
1100 }
1101
1107 Dims getPrePadding() const noexcept
1108 {
1109 return mImpl->getPrePadding();
1110 }
1111
1124 void setPostPadding(Dims const& padding) noexcept
1125 {
1126 mImpl->setPostPadding(padding);
1127 }
1128
1134 Dims getPostPadding() const noexcept
1135 {
1136 return mImpl->getPostPadding();
1137 }
1138
1148 void setPaddingMode(PaddingMode paddingMode) noexcept
1149 {
1150 mImpl->setPaddingMode(paddingMode);
1151 }
1152
1161 {
1162 return mImpl->getPaddingMode();
1163 }
1164
1173 void setKernelSizeNd(Dims const& kernelSize) noexcept
1174 {
1175 mImpl->setKernelSizeNd(kernelSize);
1176 }
1177
1183 Dims getKernelSizeNd() const noexcept
1184 {
1185 return mImpl->getKernelSizeNd();
1186 }
1187
1198 void setStrideNd(Dims const& stride) noexcept
1199 {
1200 mImpl->setStrideNd(stride);
1201 }
1202
1208 Dims getStrideNd() const noexcept
1209 {
1210 return mImpl->getStrideNd();
1211 }
1212
1226 void setPaddingNd(Dims const& padding) noexcept
1227 {
1228 mImpl->setPaddingNd(padding);
1229 }
1230
1238 Dims getPaddingNd() const noexcept
1239 {
1240 return mImpl->getPaddingNd();
1241 }
1242
1252 void setDilationNd(Dims const& dilation) noexcept
1253 {
1254 mImpl->setDilationNd(dilation);
1255 }
1256
1262 Dims getDilationNd() const noexcept
1263 {
1264 return mImpl->getDilationNd();
1265 }
1266
1281 using ILayer::setInput;
1282
1283protected:
1284 virtual ~IConvolutionLayer() noexcept = default;
1285 apiv::VConvolutionLayer* mImpl;
1286};
1287
1302{
1303public:
1312 {
1313 mImpl->setActivationType(type);
1314 }
1315
1322 {
1323 return mImpl->getActivationType();
1324 }
1325
1336 void setAlpha(float alpha) noexcept
1337 {
1338 mImpl->setAlpha(alpha);
1339 }
1340
1350 void setBeta(float beta) noexcept
1351 {
1352 mImpl->setBeta(beta);
1353 }
1354
1359 float getAlpha() const noexcept
1360 {
1361 return mImpl->getAlpha();
1362 }
1363
1368 float getBeta() const noexcept
1369 {
1370 return mImpl->getBeta();
1371 }
1372
1373protected:
1374 virtual ~IActivationLayer() noexcept = default;
1375 apiv::VActivationLayer* mImpl;
1376};
1377
1383enum class PoolingType : int32_t
1384{
1385 kMAX = 0,
1386 kAVERAGE = 1,
1388};
1389
1390namespace impl
1391{
1397template <>
1399{
1400 static constexpr int32_t kVALUE = 3;
1401};
1402} // namespace impl
1403
1415class IPoolingLayer : public ILayer
1416{
1417public:
1425 void setPoolingType(PoolingType type) noexcept
1426 {
1427 mImpl->setPoolingType(type);
1428 }
1429
1436 {
1437 return mImpl->getPoolingType();
1438 }
1439
1450 void setBlendFactor(float blendFactor) noexcept
1451 {
1452 mImpl->setBlendFactor(blendFactor);
1453 }
1454
1463 float getBlendFactor() const noexcept
1464 {
1465 return mImpl->getBlendFactor();
1466 }
1467
1477 void setAverageCountExcludesPadding(bool exclusive) noexcept
1478 {
1479 mImpl->setAverageCountExcludesPadding(exclusive);
1480 }
1481
1489 {
1490 return mImpl->getAverageCountExcludesPadding();
1491 }
1492
1506 void setPrePadding(Dims const& padding) noexcept
1507 {
1508 mImpl->setPrePadding(padding);
1509 }
1510
1516 Dims getPrePadding() const noexcept
1517 {
1518 return mImpl->getPrePadding();
1519 }
1520
1534 void setPostPadding(Dims const& padding) noexcept
1535 {
1536 mImpl->setPostPadding(padding);
1537 }
1538
1544 Dims getPostPadding() const noexcept
1545 {
1546 return mImpl->getPostPadding();
1547 }
1548
1557 void setPaddingMode(PaddingMode paddingMode) noexcept
1558 {
1559 mImpl->setPaddingMode(paddingMode);
1560 }
1561
1569 {
1570 return mImpl->getPaddingMode();
1571 }
1572
1581 void setWindowSizeNd(Dims const& windowSize) noexcept
1582 {
1583 mImpl->setWindowSizeNd(windowSize);
1584 }
1585
1591 Dims getWindowSizeNd() const noexcept
1592 {
1593 return mImpl->getWindowSizeNd();
1594 }
1595
1606 void setStrideNd(Dims const& stride) noexcept
1607 {
1608 mImpl->setStrideNd(stride);
1609 }
1610
1616 Dims getStrideNd() const noexcept
1617 {
1618 return mImpl->getStrideNd();
1619 }
1620
1635 void setPaddingNd(Dims const& padding) noexcept
1636 {
1637 mImpl->setPaddingNd(padding);
1638 }
1639
1647 Dims getPaddingNd() const noexcept
1648 {
1649 return mImpl->getPaddingNd();
1650 }
1651
1652protected:
1653 virtual ~IPoolingLayer() noexcept = default;
1654 apiv::VPoolingLayer* mImpl;
1655};
1656
1666class ILRNLayer : public ILayer
1667{
1668public:
1678 void setWindowSize(int64_t windowSize) noexcept
1679 {
1680 mImpl->setWindowSize(windowSize);
1681 }
1682
1688 int64_t getWindowSize() const noexcept
1689 {
1690 return mImpl->getWindowSize();
1691 }
1692
1700 void setAlpha(float alpha) noexcept
1701 {
1702 mImpl->setAlpha(alpha);
1703 }
1704
1710 float getAlpha() const noexcept
1711 {
1712 return mImpl->getAlpha();
1713 }
1714
1722 void setBeta(float beta) noexcept
1723 {
1724 mImpl->setBeta(beta);
1725 }
1726
1732 float getBeta() const noexcept
1733 {
1734 return mImpl->getBeta();
1735 }
1736
1744 void setK(float k) noexcept
1745 {
1746 mImpl->setK(k);
1747 }
1748
1754 float getK() const noexcept
1755 {
1756 return mImpl->getK();
1757 }
1758
1759protected:
1760 virtual ~ILRNLayer() noexcept = default;
1761 apiv::VLRNLayer* mImpl;
1762};
1763
1769enum class ScaleMode : int32_t
1770{
1771 kUNIFORM = 0,
1772 kCHANNEL = 1,
1773 kELEMENTWISE = 2
1774};
1775
1781template <>
1782constexpr inline int32_t EnumMax<ScaleMode>() noexcept
1783{
1784 return 3;
1785}
1786
1812class IScaleLayer : public ILayer
1813{
1814public:
1820 void setMode(ScaleMode mode) noexcept
1821 {
1822 mImpl->setMode(mode);
1823 }
1824
1830 ScaleMode getMode() const noexcept
1831 {
1832 return mImpl->getMode();
1833 }
1834
1840 void setShift(Weights shift) noexcept
1841 {
1842 mImpl->setShift(shift);
1843 }
1844
1850 Weights getShift() const noexcept
1851 {
1852 return mImpl->getShift();
1853 }
1854
1860 void setScale(Weights scale) noexcept
1861 {
1862 mImpl->setScale(scale);
1863 }
1864
1870 Weights getScale() const noexcept
1871 {
1872 return mImpl->getScale();
1873 }
1874
1880 void setPower(Weights power) noexcept
1881 {
1882 mImpl->setPower(power);
1883 }
1884
1890 Weights getPower() const noexcept
1891 {
1892 return mImpl->getPower();
1893 }
1894
1905 int32_t getChannelAxis() const noexcept
1906 {
1907 return mImpl->getChannelAxis();
1908 }
1909
1926 void setChannelAxis(int32_t channelAxis) noexcept
1927 {
1928 mImpl->setChannelAxis(channelAxis);
1929 }
1930
1931protected:
1932 virtual ~IScaleLayer() noexcept = default;
1933 apiv::VScaleLayer* mImpl;
1934};
1935
1956class ISoftMaxLayer : public ILayer
1957{
1958public:
1979 void setAxes(uint32_t axes) noexcept
1980 {
1981 mImpl->setAxes(axes);
1982 }
1983
1989 uint32_t getAxes() const noexcept
1990 {
1991 return mImpl->getAxes();
1992 }
1993
1994protected:
1995 virtual ~ISoftMaxLayer() noexcept = default;
1996 apiv::VSoftMaxLayer* mImpl;
1997};
1998
2012{
2013public:
2025 void setAxis(int32_t axis) noexcept
2026 {
2027 mImpl->setAxis(axis);
2028 }
2029
2035 int32_t getAxis() const noexcept
2036 {
2037 return mImpl->getAxis();
2038 }
2039
2040protected:
2041 virtual ~IConcatenationLayer() noexcept = default;
2042 apiv::VConcatenationLayer* mImpl;
2043};
2044
2053{
2054public:
2062 void setNbOutputMaps(int64_t nbOutputMaps) noexcept
2063 {
2064 mImpl->setNbOutputMaps(nbOutputMaps);
2065 }
2066
2072 int64_t getNbOutputMaps() const noexcept
2073 {
2074 return mImpl->getNbOutputMaps();
2075 }
2076
2092 void setNbGroups(int64_t nbGroups) noexcept
2093 {
2094 mImpl->setNbGroups(nbGroups);
2095 }
2096
2102 int64_t getNbGroups() const noexcept
2103 {
2104 return mImpl->getNbGroups();
2105 }
2106
2116 void setKernelWeights(Weights weights) noexcept
2117 {
2118 mImpl->setKernelWeights(weights);
2119 }
2120
2126 Weights getKernelWeights() const noexcept
2127 {
2128 return mImpl->getKernelWeights();
2129 }
2130
2141 void setBiasWeights(Weights weights) noexcept
2142 {
2143 mImpl->setBiasWeights(weights);
2144 }
2145
2151 Weights getBiasWeights() const noexcept
2152 {
2153 return mImpl->getBiasWeights();
2154 }
2155
2168 void setPrePadding(Dims const& padding) noexcept
2169 {
2170 mImpl->setPrePadding(padding);
2171 }
2172
2178 Dims getPrePadding() const noexcept
2179 {
2180 return mImpl->getPrePadding();
2181 }
2182
2195 void setPostPadding(Dims const& padding) noexcept
2196 {
2197 mImpl->setPostPadding(padding);
2198 }
2199
2205 Dims getPostPadding() const noexcept
2206 {
2207 return mImpl->getPostPadding();
2208 }
2209
2219 void setPaddingMode(PaddingMode paddingMode) noexcept
2220 {
2221 mImpl->setPaddingMode(paddingMode);
2222 }
2223
2232 {
2233 return mImpl->getPaddingMode();
2234 }
2235
2246 void setKernelSizeNd(Dims const& kernelSize) noexcept
2247 {
2248 mImpl->setKernelSizeNd(kernelSize);
2249 }
2250
2256 Dims getKernelSizeNd() const noexcept
2257 {
2258 return mImpl->getKernelSizeNd();
2259 }
2260
2273 void setStrideNd(Dims const& stride) noexcept
2274 {
2275 mImpl->setStrideNd(stride);
2276 }
2277
2283 Dims getStrideNd() const noexcept
2284 {
2285 return mImpl->getStrideNd();
2286 }
2287
2301 void setPaddingNd(Dims const& padding) noexcept
2302 {
2303 mImpl->setPaddingNd(padding);
2304 }
2305
2313 Dims getPaddingNd() const noexcept
2314 {
2315 return mImpl->getPaddingNd();
2316 }
2317
2330 using ILayer::setInput;
2331
2339 void setDilationNd(Dims const& dilation) noexcept
2340 {
2341 mImpl->setDilationNd(dilation);
2342 }
2343
2349 Dims getDilationNd() const noexcept
2350 {
2351 return mImpl->getDilationNd();
2352 }
2353
2354protected:
2355 virtual ~IDeconvolutionLayer() noexcept = default;
2356 apiv::VDeconvolutionLayer* mImpl;
2357};
2358
2371enum class ElementWiseOperation : int32_t
2372{
2373 kSUM = 0,
2374 kPROD = 1,
2375 kMAX = 2,
2376 kMIN = 3,
2377 kSUB = 4,
2378 kDIV = 5,
2379 kPOW = 6,
2380 kFLOOR_DIV = 7,
2381 kAND = 8,
2382 kOR = 9,
2383 kXOR = 10,
2384 kEQUAL = 11,
2385 kGREATER = 12,
2386 kLESS = 13
2387};
2388
2389namespace impl
2390{
2396template <>
2398{
2399 static constexpr int32_t kVALUE = 14;
2400};
2401} // namespace impl
2402
2423{
2424public:
2435 {
2436 return mImpl->setOperation(op);
2437 }
2438
2447 {
2448 return mImpl->getOperation();
2449 }
2450
2451protected:
2452 apiv::VElementWiseLayer* mImpl;
2453 virtual ~IElementWiseLayer() noexcept = default;
2454};
2455
2461enum class GatherMode : int32_t
2462{
2463 kDEFAULT = 0,
2464 kELEMENT = 1,
2465 kND = 2
2466};
2467
2473template <>
2474constexpr inline int32_t EnumMax<GatherMode>() noexcept
2475{
2476 return 3;
2477}
2478
2555class IGatherLayer : public ILayer
2556{
2557public:
2567 void setGatherAxis(int32_t axis) noexcept
2568 {
2569 mImpl->setGatherAxis(axis);
2570 }
2571
2579 int32_t getGatherAxis() const noexcept
2580 {
2581 return mImpl->getGatherAxis();
2582 }
2583
2602 void setNbElementWiseDims(int32_t elementWiseDims) noexcept
2603 {
2604 mImpl->setNbElementWiseDims(elementWiseDims);
2605 }
2606
2612 int32_t getNbElementWiseDims() const noexcept
2613 {
2614 return mImpl->getNbElementWiseDims();
2615 }
2616
2622 void setMode(GatherMode mode) noexcept
2623 {
2624 mImpl->setMode(mode);
2625 }
2626
2632 GatherMode getMode() const noexcept
2633 {
2634 return mImpl->getMode();
2635 }
2636
2637protected:
2638 apiv::VGatherLayer* mImpl;
2639 virtual ~IGatherLayer() noexcept = default;
2640};
2641
2654{
2655public:
2662 {
2663 return mImpl->getPlugin();
2664 }
2665
2666protected:
2667 apiv::VPluginV2Layer* mImpl;
2668 virtual ~IPluginV2Layer() noexcept = default;
2669};
2670
2681{
2682public:
2689 {
2690 return mImpl->getPlugin();
2691 }
2692
2693protected:
2694 apiv::VPluginV3Layer* mImpl;
2695 virtual ~IPluginV3Layer() noexcept = default;
2696};
2697
2714enum class UnaryOperation : int32_t
2715{
2716 kEXP = 0,
2717 kLOG = 1,
2718 kSQRT = 2,
2719 kRECIP = 3,
2720 kABS = 4,
2721 kNEG = 5,
2722 kSIN = 6,
2723 kCOS = 7,
2724 kTAN = 8,
2725 kSINH = 9,
2726 kCOSH = 10,
2727 kASIN = 11,
2728 kACOS = 12,
2729 kATAN = 13,
2730 kASINH = 14,
2731 kACOSH = 15,
2732 kATANH = 16,
2733 kCEIL = 17,
2734 kFLOOR = 18,
2735 kERF = 19,
2736 kNOT = 20,
2737 kSIGN = 21,
2738 kROUND = 22,
2739 kISINF = 23,
2740 kISNAN = 24,
2741};
2742
2748template <>
2749constexpr inline int32_t EnumMax<UnaryOperation>() noexcept
2750{
2751 return 25;
2752}
2753
2761class IUnaryLayer : public ILayer
2762{
2763public:
2772 {
2773 mImpl->setOperation(op);
2774 }
2775
2782 {
2783 return mImpl->getOperation();
2784 }
2785
2786protected:
2787 apiv::VUnaryLayer* mImpl;
2788 virtual ~IUnaryLayer() noexcept = default;
2789};
2790
2814enum class ReduceOperation : int32_t
2815{
2816 kSUM = 0,
2817 kPROD = 1,
2818 kMAX = 2,
2819 kMIN = 3,
2820 kAVG = 4,
2821 kNONE = 5,
2822};
2823
2829template <>
2830constexpr inline int32_t EnumMax<ReduceOperation>() noexcept
2831{
2832 return 6;
2833}
2834
2842enum class CollectiveOperation : int32_t
2843{
2844 kALL_REDUCE = 0,
2845 kALL_GATHER = 1,
2846 kBROADCAST = 2,
2847 kREDUCE = 3,
2848 kREDUCE_SCATTER = 4,
2849};
2850
2856template <>
2858{
2859 static constexpr int32_t kVALUE = 5;
2860};
2861
2869class IReduceLayer : public ILayer
2870{
2871public:
2878 {
2879 mImpl->setOperation(op);
2880 }
2881
2888 {
2889 return mImpl->getOperation();
2890 }
2891
2897 void setReduceAxes(uint32_t reduceAxes) noexcept
2898 {
2899 mImpl->setReduceAxes(reduceAxes);
2900 }
2901
2907 uint32_t getReduceAxes() const noexcept
2908 {
2909 return mImpl->getReduceAxes();
2910 }
2911
2917 void setKeepDimensions(bool keepDimensions) noexcept
2918 {
2919 mImpl->setKeepDimensions(keepDimensions);
2920 }
2921
2927 bool getKeepDimensions() const noexcept
2928 {
2929 return mImpl->getKeepDimensions();
2930 }
2931
2932protected:
2933 apiv::VReduceLayer* mImpl;
2934 virtual ~IReduceLayer() noexcept = default;
2935};
2936
2949class IPaddingLayer : public ILayer
2950{
2951public:
2961 void setPrePaddingNd(Dims const& padding) noexcept
2962 {
2963 mImpl->setPrePaddingNd(padding);
2964 }
2965
2973 Dims getPrePaddingNd() const noexcept
2974 {
2975 return mImpl->getPrePaddingNd();
2976 }
2977
2987 void setPostPaddingNd(Dims const& padding) noexcept
2988 {
2989 mImpl->setPostPaddingNd(padding);
2990 }
2991
2999 Dims getPostPaddingNd() const noexcept
3000 {
3001 return mImpl->getPostPaddingNd();
3002 }
3003
3004protected:
3005 apiv::VPaddingLayer* mImpl;
3006 virtual ~IPaddingLayer() noexcept = default;
3007};
3008
3015{
3022 int32_t order[Dims::MAX_DIMS];
3023};
3024
3037class IShuffleLayer : public ILayer
3038{
3039public:
3049 void setFirstTranspose(Permutation permutation) noexcept
3050 {
3051 mImpl->setFirstTranspose(permutation);
3052 }
3053
3062 {
3063 return mImpl->getFirstTranspose();
3064 }
3065
3089 void setReshapeDimensions(Dims const& dimensions) noexcept
3090 {
3091 mImpl->setReshapeDimensions(dimensions);
3092 }
3093
3103 {
3104 return mImpl->getReshapeDimensions();
3105 }
3106
3112 //
3135 using ILayer::setInput;
3136
3149 void setSecondTranspose(Permutation permutation) noexcept
3150 {
3151 mImpl->setSecondTranspose(permutation);
3152 }
3153
3162 {
3163 return mImpl->getSecondTranspose();
3164 }
3165
3177 void setZeroIsPlaceholder(bool zeroIsPlaceholder) noexcept
3178 {
3179 return mImpl->setZeroIsPlaceholder(zeroIsPlaceholder);
3180 }
3181
3190 bool getZeroIsPlaceholder() const noexcept
3191 {
3192 return mImpl->getZeroIsPlaceholder();
3193 }
3194
3195protected:
3196 apiv::VShuffleLayer* mImpl;
3197 virtual ~IShuffleLayer() noexcept = default;
3198};
3199
3205enum class SampleMode : int32_t
3206{
3207 kSTRICT_BOUNDS = 0,
3208 kWRAP = 1,
3209 kCLAMP = 2,
3210 kFILL = 3,
3211 kREFLECT = 4,
3214};
3215
3221template <>
3222constexpr inline int32_t EnumMax<SampleMode>() noexcept
3223{
3224 return 5;
3225}
3226
3289class ISliceLayer : public ILayer
3290{
3291public:
3301 void setStart(Dims const& start) noexcept
3302 {
3303 mImpl->setStart(start);
3304 }
3305
3316 Dims getStart() const noexcept
3317 {
3318 return mImpl->getStart();
3319 }
3320
3330 void setSize(Dims const& size) noexcept
3331 {
3332 return mImpl->setSize(size);
3333 }
3334
3345 Dims getSize() const noexcept
3346 {
3347 return mImpl->getSize();
3348 }
3349
3359 void setStride(Dims const& stride) noexcept
3360 {
3361 mImpl->setStride(stride);
3362 }
3363
3374 Dims getStride() const noexcept
3375 {
3376 return mImpl->getStride();
3377 }
3378
3384 void setMode(SampleMode mode) noexcept
3385 {
3386 mImpl->setMode(mode);
3387 }
3388
3394 SampleMode getMode() const noexcept
3395 {
3396 return mImpl->getMode();
3397 }
3398
3426 using ILayer::setInput;
3427
3437 void setAxes(Dims const& axes) noexcept
3438 {
3439 mImpl->setAxes(axes);
3440 }
3441
3452 Dims getAxes() const noexcept
3453 {
3454 return mImpl->getAxes();
3455 }
3456
3457protected:
3458 apiv::VSliceLayer* mImpl;
3459 virtual ~ISliceLayer() noexcept = default;
3460};
3461
3474class IShapeLayer : public ILayer
3475{
3476protected:
3477 apiv::VShapeLayer* mImpl;
3478 virtual ~IShapeLayer() noexcept = default;
3479};
3480
3486enum class TopKOperation : int32_t
3487{
3488 kMAX = 0,
3489 kMIN = 1,
3490};
3491
3497template <>
3498constexpr inline int32_t EnumMax<TopKOperation>() noexcept
3499{
3500 return 2;
3501}
3502
3514class ITopKLayer : public ILayer
3515{
3516public:
3522 void setOperation(TopKOperation op) noexcept
3523 {
3524 mImpl->setOperation(op);
3525 }
3526
3533 {
3534 return mImpl->getOperation();
3535 }
3536
3546 void setK(int32_t k) noexcept
3547 {
3548 mImpl->setK(k);
3549 }
3550
3560 int32_t getK() const noexcept
3561 {
3562 return mImpl->getK();
3563 }
3564
3570 void setReduceAxes(uint32_t reduceAxes) noexcept
3571 {
3572 mImpl->setReduceAxes(reduceAxes);
3573 }
3574
3580 uint32_t getReduceAxes() const noexcept
3581 {
3582 return mImpl->getReduceAxes();
3583 }
3584
3599 using ILayer::setInput;
3600
3611 bool setIndicesType(DataType type) noexcept
3612 {
3613 return mImpl->setIndicesType(type);
3614 }
3615
3623 DataType getIndicesType() const noexcept
3624 {
3625 return mImpl->getIndicesType();
3626 }
3627
3628protected:
3629 apiv::VTopKLayer* mImpl;
3630 virtual ~ITopKLayer() noexcept = default;
3631};
3632
3639enum class MatrixOperation : int32_t
3640{
3644 kNONE = 0,
3645
3647 kTRANSPOSE = 1,
3648
3659 kVECTOR = 2,
3660};
3661
3667template <>
3668constexpr inline int32_t EnumMax<MatrixOperation>() noexcept
3669{
3670 return 3;
3671}
3672
3699{
3700public:
3709 void setOperation(int32_t index, MatrixOperation op) noexcept
3710 {
3711 mImpl->setOperation(index, op);
3712 }
3713
3721 MatrixOperation getOperation(int32_t index) const noexcept
3722 {
3723 return mImpl->getOperation(index);
3724 }
3725
3726protected:
3727 apiv::VMatrixMultiplyLayer* mImpl;
3728 virtual ~IMatrixMultiplyLayer() noexcept = default;
3729};
3730
3752class INonZeroLayer : public ILayer
3753{
3754public:
3765 bool setIndicesType(DataType type) noexcept
3766 {
3767 return mImpl->setIndicesType(type);
3768 }
3769
3777 DataType getIndicesType() const noexcept
3778 {
3779 return mImpl->getIndicesType();
3780 }
3781
3782protected:
3783 virtual ~INonZeroLayer() noexcept = default;
3784 apiv::VNonZeroLayer* mImpl;
3785};
3786
3802{
3803protected:
3804 apiv::VRaggedSoftMaxLayer* mImpl;
3805 virtual ~IRaggedSoftMaxLayer() noexcept = default;
3806};
3807
3852{
3853protected:
3854 apiv::VIdentityLayer* mImpl;
3855 virtual ~IIdentityLayer() noexcept = default;
3856};
3857
3864class ICastLayer : public ILayer
3865{
3866public:
3874 void setToType(DataType toType) noexcept
3875 {
3876 mImpl->setToType(toType);
3877 }
3878
3885 DataType getToType() const noexcept
3886 {
3887 return mImpl->getToType();
3888 }
3889
3890protected:
3891 apiv::VCastLayer* mImpl;
3892 virtual ~ICastLayer() noexcept = default;
3893};
3894
3904{
3905public:
3914 void setWeights(Weights weights) noexcept
3915 {
3916 mImpl->setWeights(weights);
3917 }
3918
3924 Weights getWeights() const noexcept
3925 {
3926 return mImpl->getWeights();
3927 }
3928
3936 void setDimensions(Dims const& dimensions) noexcept
3937 {
3938 mImpl->setDimensions(dimensions);
3939 }
3940
3948 Dims getDimensions() const noexcept
3949 {
3950 return mImpl->getDimensions();
3951 }
3952
3953protected:
3954 apiv::VConstantLayer* mImpl;
3955 virtual ~IConstantLayer() noexcept = default;
3956};
3957
3968{
3969protected:
3970 apiv::VParametricReLULayer* mImpl;
3971 virtual ~IParametricReLULayer() noexcept = default;
3972};
3973
3979enum class InterpolationMode : int32_t
3980{
3981 kNEAREST = 0,
3982 kLINEAR = 1,
3983 kCUBIC = 2
3984};
3985
3986namespace impl
3987{
3993template <>
3995{
3996 static constexpr int32_t kVALUE = 3;
3997};
3998} // namespace impl
3999
4008{
4021 kALIGN_CORNERS = 0,
4022
4029 kASYMMETRIC = 1,
4030
4037 kHALF_PIXEL = 2,
4038};
4039
4040namespace impl
4041{
4047template <>
4049{
4050 static constexpr int32_t kVALUE = 3;
4051};
4052} // namespace impl
4053
4061enum class ResizeSelector : int32_t
4062{
4064 kFORMULA = 0,
4065
4067 kUPPER = 1,
4068};
4069
4070namespace impl
4071{
4077template <>
4079{
4080 static constexpr int32_t kVALUE = 2;
4081};
4082} // namespace impl
4083
4091enum class ResizeRoundMode : int32_t
4092{
4094 kHALF_UP = 0,
4095
4097 kHALF_DOWN = 1,
4098
4100 kFLOOR = 2,
4101
4103 kCEIL = 3,
4104};
4105
4106namespace impl
4107{
4113template <>
4115{
4116 static constexpr int32_t kVALUE = 4;
4117};
4118} // namespace impl
4119
4156class IResizeLayer : public ILayer
4157{
4158public:
4177 void setOutputDimensions(Dims const& dimensions) noexcept
4178 {
4179 return mImpl->setOutputDimensions(dimensions);
4180 }
4181
4187 Dims getOutputDimensions() const noexcept
4188 {
4189 return mImpl->getOutputDimensions();
4190 }
4191
4217 void setScales(float const* scales, int32_t nbScales) noexcept
4218 {
4219 mImpl->setScales(scales, nbScales);
4220 }
4221
4236 int32_t getScales(int32_t size, float* scales) const noexcept
4237 {
4238 return mImpl->getScales(size, scales);
4239 }
4240
4248 void setResizeMode(InterpolationMode interpolationMode) noexcept
4249 {
4250 mImpl->setResizeMode(interpolationMode);
4251 }
4252
4259 {
4260 return mImpl->getResizeMode();
4261 }
4262
4282 using ILayer::setInput;
4283
4294 {
4295 mImpl->setCoordinateTransformation(coordTransform);
4296 }
4297
4304 {
4305 return mImpl->getCoordinateTransformation();
4306 }
4307
4319 {
4320 mImpl->setSelectorForSinglePixel(selector);
4321 }
4322
4329 {
4330 return mImpl->getSelectorForSinglePixel();
4331 }
4332
4343 {
4344 mImpl->setNearestRounding(value);
4345 }
4346
4353 {
4354 return mImpl->getNearestRounding();
4355 }
4356
4374 void setCubicCoeff(float A) noexcept
4375 {
4376 mImpl->setCubicCoeff(A);
4377 }
4378
4384 float getCubicCoeff() const noexcept
4385 {
4386 return mImpl->getCubicCoeff();
4387 }
4388
4397 void setExcludeOutside(bool excludeFlag) noexcept
4398 {
4399 mImpl->setExcludeOutside(excludeFlag);
4400 }
4401
4407 bool getExcludeOutside() const noexcept
4408 {
4409 return mImpl->getExcludeOutside();
4410 }
4411
4412protected:
4413 virtual ~IResizeLayer() noexcept = default;
4414 apiv::VResizeLayer* mImpl;
4415};
4416
4422enum class LoopOutput : int32_t
4423{
4425 kLAST_VALUE = 0,
4426
4428 kCONCATENATE = 1,
4429
4431 kREVERSE = 2
4432};
4433
4439template <>
4440constexpr inline int32_t EnumMax<LoopOutput>() noexcept
4441{
4442 return 3;
4443}
4444
4450enum class TripLimit : int32_t
4451{
4452
4453 kCOUNT = 0,
4454 kWHILE = 1
4455};
4456
4462template <>
4463constexpr inline int32_t EnumMax<TripLimit>() noexcept
4464{
4465 return 2;
4466}
4467
4468class ILoop;
4469
4484{
4485public:
4489 ILoop* getLoop() const noexcept
4490 {
4491 return mBoundary->getLoop();
4492 }
4493
4494protected:
4495 virtual ~ILoopBoundaryLayer() noexcept = default;
4496 apiv::VLoopBoundaryLayer* mBoundary;
4497};
4498
4507{
4508public:
4513 {
4514 return mBoundary->getConditional();
4515 }
4516
4517protected:
4518 virtual ~IIfConditionalBoundaryLayer() noexcept = default;
4519 apiv::VConditionalBoundaryLayer* mBoundary;
4520};
4521
4528{
4529public:
4530protected:
4531 virtual ~IConditionLayer() noexcept = default;
4532 apiv::VConditionLayer* mImpl;
4533};
4534
4545{
4546public:
4547protected:
4548 virtual ~IIfConditionalOutputLayer() noexcept = default;
4549 apiv::VConditionalOutputLayer* mImpl;
4550};
4551
4558{
4559public:
4560protected:
4561 virtual ~IIfConditionalInputLayer() noexcept = default;
4562 apiv::VConditionalInputLayer* mImpl;
4563};
4564
4590{
4591public:
4602 {
4603 return mImpl->setCondition(condition);
4604 }
4605
4619 IIfConditionalOutputLayer* addOutput(ITensor& trueSubgraphOutput, ITensor& falseSubgraphOutput) noexcept
4620 {
4621 return mImpl->addOutput(trueSubgraphOutput, falseSubgraphOutput);
4622 }
4623
4632 {
4633 return mImpl->addInput(input);
4634 }
4635
4646 void setName(char const* name) noexcept
4647 {
4648 mImpl->setName(name);
4649 }
4650
4656 char const* getName() const noexcept
4657 {
4658 return mImpl->getName();
4659 }
4660
4661protected:
4662 virtual ~IIfConditional() noexcept = default;
4663 apiv::VIfConditional* mImpl;
4664};
4665
4674{
4675public:
4681 //
4694 using ILayer::setInput;
4695
4696protected:
4697 virtual ~IRecurrenceLayer() noexcept = default;
4698 apiv::VRecurrenceLayer* mImpl;
4699};
4700
4721{
4722public:
4726 LoopOutput getLoopOutput() const noexcept
4727 {
4728 return mImpl->getLoopOutput();
4729 }
4730
4743 void setAxis(int32_t axis) noexcept
4744 {
4745 mImpl->setAxis(axis);
4746 }
4747
4751 int32_t getAxis() const noexcept
4752 {
4753 return mImpl->getAxis();
4754 }
4755
4761 //
4776 using ILayer::setInput;
4777
4778protected:
4779 virtual ~ILoopOutputLayer() noexcept = default;
4780 apiv::VLoopOutputLayer* mImpl;
4781};
4782
4795{
4796public:
4800 TripLimit getTripLimit() const noexcept
4801 {
4802 return mImpl->getTripLimit();
4803 }
4804
4805protected:
4806 virtual ~ITripLimitLayer() noexcept = default;
4807 apiv::VTripLimitLayer* mImpl;
4808};
4809
4821{
4822public:
4826 void setAxis(int32_t axis) noexcept
4827 {
4828 mImpl->setAxis(axis);
4829 }
4830
4834 int32_t getAxis() const noexcept
4835 {
4836 return mImpl->getAxis();
4837 }
4838
4848 void setReverse(bool reverse) noexcept
4849 {
4850 mImpl->setReverse(reverse);
4851 }
4852
4858 bool getReverse() const noexcept
4859 {
4860 return mImpl->getReverse();
4861 }
4862
4863protected:
4864 virtual ~IIteratorLayer() noexcept = default;
4865 apiv::VIteratorLayer* mImpl;
4866};
4867
4878class ILoop : public INoCopy
4879{
4880public:
4887 IRecurrenceLayer* addRecurrence(ITensor& initialValue) noexcept
4888 {
4889 return mImpl->addRecurrence(initialValue);
4890 }
4891
4909 {
4910 return mImpl->addTripLimit(tensor, limit);
4911 }
4912
4921 IIteratorLayer* addIterator(ITensor& tensor, int32_t axis = 0, bool reverse = false) noexcept
4922 {
4923 return mImpl->addIterator(tensor, axis, reverse);
4924 }
4925
4934 ILoopOutputLayer* addLoopOutput(ITensor& tensor, LoopOutput outputKind, int32_t axis = 0) noexcept
4935 {
4936 return mImpl->addLoopOutput(tensor, outputKind, axis);
4937 }
4938
4949 void setName(char const* name) noexcept
4950 {
4951 mImpl->setName(name);
4952 }
4953
4959 char const* getName() const noexcept
4960 {
4961 return mImpl->getName();
4962 }
4963
4964protected:
4965 virtual ~ILoop() noexcept = default;
4966 apiv::VLoop* mImpl;
4967};
4968
4981class ISelectLayer : public ILayer
4982{
4983protected:
4984 virtual ~ISelectLayer() noexcept = default;
4985 apiv::VSelectLayer* mImpl;
4986};
4987
5004{
5005public:
5014 void setMessage(char const* message) noexcept
5015 {
5016 mImpl->setMessage(message);
5017 }
5018
5024 char const* getMessage() const noexcept
5025 {
5026 return mImpl->getMessage();
5027 }
5028
5029protected:
5030 virtual ~IAssertionLayer() noexcept = default;
5031
5032 apiv::VAssertionLayer* mImpl;
5033};
5034
5042enum class FillOperation : int32_t
5043{
5060 kLINSPACE = 0,
5061
5064 kRANDOM_UNIFORM = 1,
5065
5068 kRANDOM_NORMAL = 2
5069};
5070
5076template <>
5077constexpr inline int32_t EnumMax<FillOperation>() noexcept
5078{
5079 return 3;
5080}
5081
5117class IFillLayer : public ILayer
5118{
5119public:
5128 //
5129 void setDimensions(Dims const& dimensions) noexcept
5130 {
5131 mImpl->setDimensions(dimensions);
5132 }
5133
5144 Dims getDimensions() const noexcept
5145 {
5146 return mImpl->getDimensions();
5147 }
5148
5154 void setOperation(FillOperation op) noexcept
5155 {
5156 mImpl->setOperation(op);
5157 }
5158
5165 {
5166 return mImpl->getOperation();
5167 }
5168
5182 //
5183 void setAlpha(double alpha) noexcept
5184 {
5185 mImpl->setAlpha(alpha);
5186 }
5187
5198 double getAlpha() const noexcept
5199 {
5200 return mImpl->getAlpha();
5201 }
5202
5217 void setBeta(double beta) noexcept
5218 {
5219 mImpl->setBeta(beta);
5220 }
5221
5232 double getBeta() const noexcept
5233 {
5234 return mImpl->getBeta();
5235 }
5236
5277 using ILayer::setInput;
5278
5292 //
5293 void setAlphaInt64(int64_t alpha) noexcept
5294 {
5295 mImpl->setAlphaInt64(alpha);
5296 }
5297
5308 int64_t getAlphaInt64() const noexcept
5309 {
5310 return mImpl->getAlphaInt64();
5311 }
5312
5327 void setBetaInt64(int64_t beta) noexcept
5328 {
5329 mImpl->setBetaInt64(beta);
5330 }
5331
5342 int64_t getBetaInt64() const noexcept
5343 {
5344 return mImpl->getBetaInt64();
5345 }
5346
5350 bool isAlphaBetaInt64() const noexcept
5351 {
5352 return mImpl->isAlphaBetaInt64();
5353 }
5354
5368 void setToType(DataType toType) noexcept
5369 {
5370 mImpl->setToType(toType);
5371 }
5372
5380 DataType getToType() const noexcept
5381 {
5382 return mImpl->getToType();
5383 }
5384
5385protected:
5386 virtual ~IFillLayer() noexcept = default;
5387 apiv::VFillLayer* mImpl;
5388};
5389
5465{
5466public:
5475 int32_t getAxis() const noexcept
5476 {
5477 return mImpl->getAxis();
5478 }
5486 void setAxis(int32_t axis) noexcept
5487 {
5488 mImpl->setAxis(axis);
5489 }
5490
5499 bool setBlockShape(Dims const& blockShape) noexcept
5500 {
5501 return mImpl->setBlockShape(blockShape);
5502 }
5503
5511 {
5512 return mImpl->getBlockShape();
5513 }
5514
5526 void setToType(DataType toType) noexcept
5527 {
5528 mImpl->setToType(toType);
5529 }
5530
5538 DataType getToType() const noexcept
5539 {
5540 return mImpl->getToType();
5541 }
5542
5543protected:
5544 virtual ~IQuantizeLayer() noexcept = default;
5545 apiv::VQuantizeLayer* mImpl;
5546};
5547
5617{
5618public:
5627 int32_t getAxis() const noexcept
5628 {
5629 return mImpl->getAxis();
5630 }
5638 void setAxis(int32_t axis) noexcept
5639 {
5640 mImpl->setAxis(axis);
5641 }
5642
5655 bool setBlockShape(Dims const& blockShape) noexcept
5656 {
5657 return mImpl->setBlockShape(blockShape);
5658 }
5659
5667 {
5668 return mImpl->getBlockShape();
5669 }
5670
5682 void setToType(DataType toType) noexcept
5683 {
5684 mImpl->setToType(toType);
5685 }
5686
5694 DataType getToType() const noexcept
5695 {
5696 return mImpl->getToType();
5697 }
5698
5699protected:
5700 virtual ~IDequantizeLayer() noexcept = default;
5701 apiv::VDequantizeLayer* mImpl;
5702};
5703
5722{
5723public:
5735 using ILayer::setInput;
5736
5749 void setToType(DataType toType) noexcept
5750 {
5751 mImpl->setToType(toType);
5752 }
5753
5762 DataType getToType() const noexcept
5763 {
5764 return mImpl->getToType();
5765 }
5766
5775 void setScaleType(DataType scaleType) noexcept
5776 {
5777 mImpl->setScaleType(scaleType);
5778 }
5779
5788 DataType getScaleType() const noexcept
5789 {
5790 return mImpl->getScaleType();
5791 }
5792
5801 TRT_DEPRECATED void setAxis(int32_t axis) noexcept
5802 {
5803 mImpl->setAxis(axis);
5804 }
5805
5811 TRT_DEPRECATED int32_t getAxis() const noexcept
5812 {
5813 return mImpl->getAxis();
5814 }
5815
5824 TRT_DEPRECATED void setBlockSize(int32_t size) noexcept
5825 {
5826 mImpl->setBlockSize(size);
5827 }
5828
5834 TRT_DEPRECATED int32_t getBlockSize() const noexcept
5835 {
5836 return mImpl->getBlockSize();
5837 }
5838
5847 void setBlockShape(Dims const& blockShape) noexcept
5848 {
5849 mImpl->setBlockShape(blockShape);
5850 }
5851
5859 Dims getBlockShape() const noexcept
5860 {
5861 return mImpl->getBlockShape();
5862 }
5863
5864protected:
5865 virtual ~IDynamicQuantizeLayer() noexcept = default;
5866 apiv::VDynamicQuantizeLayer* mImpl;
5867};
5868
5903class IEinsumLayer : public ILayer
5904{
5905public:
5915 bool setEquation(char const* equation) noexcept
5916 {
5917 return mImpl->setEquation(equation);
5918 }
5919
5925 char const* getEquation() const noexcept
5926 {
5927 return mImpl->getEquation();
5928 }
5929
5930protected:
5931 virtual ~IEinsumLayer() noexcept = default;
5932 apiv::VEinsumLayer* mImpl;
5933};
5934
5942enum class ScatterMode : int32_t
5943{
5944 kELEMENT = 0,
5945 kND = 1,
5946};
5947
5953template <>
5954constexpr inline int32_t EnumMax<ScatterMode>() noexcept
5955{
5956 return 2;
5957}
5958
6016class IScatterLayer : public ILayer
6017{
6018public:
6024 void setMode(ScatterMode mode) noexcept
6025 {
6026 mImpl->setMode(mode);
6027 }
6028
6034 ScatterMode getMode() const noexcept
6035 {
6036 return mImpl->getMode();
6037 }
6038
6044 void setAxis(int32_t axis) noexcept
6045 {
6046 mImpl->setAxis(axis);
6047 }
6048
6052 int32_t getAxis() const noexcept
6053 {
6054 return mImpl->getAxis();
6055 }
6056
6057protected:
6058 apiv::VScatterLayer* mImpl;
6059 virtual ~IScatterLayer() noexcept = default;
6060}; // class IScatterLayer
6061
6088class IOneHotLayer : public ILayer
6089{
6090public:
6096 void setAxis(int32_t axis) noexcept
6097 {
6098 mImpl->setAxis(axis);
6099 }
6100
6104 int32_t getAxis() const noexcept
6105 {
6106 return mImpl->getAxis();
6107 }
6108
6109protected:
6110 apiv::VOneHotLayer* mImpl;
6111 virtual ~IOneHotLayer() noexcept = default;
6112};
6113
6126{
6127public:
6134 {
6135 mImpl->setInterpolationMode(mode);
6136 }
6137
6146 {
6147 return mImpl->getInterpolationMode();
6148 }
6149
6155 void setAlignCorners(bool alignCorners) noexcept
6156 {
6157 mImpl->setAlignCorners(alignCorners);
6158 }
6159
6167 bool getAlignCorners() const noexcept
6168 {
6169 return mImpl->getAlignCorners();
6170 }
6171
6179 bool setSampleMode(SampleMode mode) noexcept
6180 {
6181 return mImpl->setSampleMode(mode);
6182 }
6183
6191 SampleMode getSampleMode() const noexcept
6192 {
6193 return mImpl->getSampleMode();
6194 }
6195
6196protected:
6197 apiv::VGridSampleLayer* mImpl;
6198 virtual ~IGridSampleLayer() noexcept = default;
6199}; // class IGridSampleLayer
6200
6208enum class BoundingBoxFormat : int32_t
6209{
6211 kCORNER_PAIRS = 0,
6213 kCENTER_SIZES = 1
6214};
6215
6221template <>
6222constexpr inline int32_t EnumMax<BoundingBoxFormat>() noexcept
6223{
6224 return 2;
6225}
6226
6277class INMSLayer : public ILayer
6278{
6279public:
6290 {
6291 mImpl->setBoundingBoxFormat(fmt);
6292 }
6293
6302 {
6303 return mImpl->getBoundingBoxFormat();
6304 }
6305
6315 void setTopKBoxLimit(int32_t limit) noexcept
6316 {
6317 mImpl->setTopKBoxLimit(limit);
6318 }
6319
6325 int32_t getTopKBoxLimit() const noexcept
6326 {
6327 return mImpl->getTopKBoxLimit();
6328 }
6329
6348 using ILayer::setInput;
6349
6360 bool setIndicesType(DataType type) noexcept
6361 {
6362 return mImpl->setIndicesType(type);
6363 }
6364
6372 DataType getIndicesType() const noexcept
6373 {
6374 return mImpl->getIndicesType();
6375 }
6376
6377protected:
6378 apiv::VNMSLayer* mImpl;
6379 virtual ~INMSLayer() noexcept = default;
6380}; // class INMSLayer
6381
6395{
6396public:
6405 void setBatchAxis(int32_t batchAxis) noexcept
6406 {
6407 mImpl->setBatchAxis(batchAxis);
6408 }
6409
6415 int32_t getBatchAxis() const noexcept
6416 {
6417 return mImpl->getBatchAxis();
6418 }
6419
6428 void setSequenceAxis(int32_t sequenceAxis) noexcept
6429 {
6430 mImpl->setSequenceAxis(sequenceAxis);
6431 }
6432
6438 int32_t getSequenceAxis() const noexcept
6439 {
6440 return mImpl->getSequenceAxis();
6441 }
6442
6443protected:
6444 apiv::VReverseSequenceLayer* mImpl;
6445 virtual ~IReverseSequenceLayer() noexcept = default;
6446}; // class IReverseSequenceLayer
6447
6467{
6468public:
6476 void setEpsilon(float eps) noexcept
6477 {
6478 return mImpl->setEpsilon(eps);
6479 }
6480
6486 float getEpsilon() const noexcept
6487 {
6488 return mImpl->getEpsilon();
6489 }
6490
6496 void setAxes(uint32_t axesMask) noexcept
6497 {
6498 return mImpl->setAxes(axesMask);
6499 }
6500
6506 uint32_t getAxes() const noexcept
6507 {
6508 return mImpl->getAxes();
6509 }
6510
6527 void setNbGroups(int64_t nbGroups) noexcept
6528 {
6529 return mImpl->setNbGroups(nbGroups);
6530 }
6531
6537 int64_t getNbGroups() const noexcept
6538 {
6539 return mImpl->getNbGroups();
6540 }
6541
6566 {
6567 return mImpl->setComputePrecision(type);
6568 }
6569
6578 {
6579 return mImpl->getComputePrecision();
6580 }
6581
6587 TRT_NODISCARD bool isV2() const noexcept
6588 {
6589 return mImpl->isV2();
6590 }
6591
6592protected:
6593 apiv::VNormalizationLayer* mImpl;
6594 virtual ~INormalizationLayer() noexcept = default;
6595};
6596
6597
6606class ISqueezeLayer : public ILayer
6607{
6608public:
6621 using ILayer::setInput;
6622
6623protected:
6624 apiv::VSqueezeLayer* mImpl;
6625 virtual ~ISqueezeLayer() noexcept = default;
6626};
6627
6637{
6638public:
6652 using ILayer::setInput;
6653
6654protected:
6655 apiv::VUnsqueezeLayer* mImpl;
6656 virtual ~IUnsqueezeLayer() noexcept = default;
6657};
6658
6670enum class CumulativeOperation : int32_t
6671{
6672 kSUM = 0,
6673};
6674
6675namespace impl
6676{
6677
6683template <>
6685{
6686 static constexpr int32_t kVALUE = 1;
6687};
6688
6689} // namespace impl
6690
6719{
6720public:
6731 {
6732 return mImpl->setOperation(op);
6733 }
6734
6743 {
6744 return mImpl->getOperation();
6745 }
6746
6754 void setExclusive(bool exclusive) noexcept
6755 {
6756 mImpl->setExclusive(exclusive);
6757 }
6758
6766 bool getExclusive() const noexcept
6767 {
6768 return mImpl->getExclusive();
6769 }
6770
6778 void setReverse(bool reverse) noexcept
6779 {
6780 mImpl->setReverse(reverse);
6781 }
6782
6790 bool getReverse() const noexcept
6791 {
6792 return mImpl->getReverse();
6793 }
6794
6795protected:
6796 apiv::VCumulativeLayer* mImpl;
6797 virtual ~ICumulativeLayer() noexcept = default;
6798};
6799
6805enum class AttentionNormalizationOp : int32_t
6806{
6807 kNONE
6808 = 0,
6809 kSOFTMAX = 1,
6810};
6811
6812namespace impl
6813{
6819template <>
6821{
6822 static constexpr int32_t kVALUE = 2;
6823};
6824
6825} // namespace impl
6826
6837{
6838public:
6842 IAttention* getAttention() const noexcept
6843 {
6844 return mBoundary->getAttention();
6845 }
6846
6847protected:
6848 virtual ~IAttentionBoundaryLayer() noexcept = default;
6849 apiv::VAttentionBoundaryLayer* mBoundary;
6850};
6851
6863{
6864public:
6880 using ILayer::setInput;
6881
6882protected:
6883 virtual ~IAttentionInputLayer() noexcept = default;
6884 apiv::VAttentionInputLayer* mImpl;
6885};
6886
6898{
6899public:
6900protected:
6901 virtual ~IAttentionOutputLayer() noexcept = default;
6902 apiv::VAttentionOutputLayer* mImpl;
6903};
6904
6954class IAttention : public INoCopy
6955{
6956public:
6965 {
6966 return mImpl->setNormalizationOperation(op);
6967 }
6968
6977 {
6978 return mImpl->getNormalizationOperation();
6979 }
6980
6993 bool setMask(ITensor& mask) noexcept
6994 {
6995 return mImpl->setMask(mask);
6996 }
6997
7005 ITensor* getMask() noexcept
7006 {
7007 return mImpl->getMask();
7008 }
7009
7018 bool setCausal(bool isCausal) noexcept
7019 {
7020 return mImpl->setCausal(isCausal);
7021 }
7022
7030 bool getCausal() const noexcept
7031 {
7032 return mImpl->getCausal();
7033 }
7034
7042 bool setDecomposable(bool decomposable) noexcept
7043 {
7044 return mImpl->setDecomposable(decomposable);
7045 }
7046
7055 bool getDecomposable() const noexcept
7056 {
7057 return mImpl->getDecomposable();
7058 }
7059
7074 bool setInput(int32_t index, ITensor& input) noexcept
7075 {
7076 return mImpl->setInput(index, input);
7077 }
7078
7083 int32_t getNbInputs() const noexcept
7084 {
7085 return mImpl->getNbInputs();
7086 }
7087
7095 ITensor* getInput(int32_t index) const noexcept
7096 {
7097 return mImpl->getInput(index);
7098 }
7099
7103 int32_t getNbOutputs() const noexcept
7104 {
7105 return mImpl->getNbOutputs();
7106 }
7107
7115 ITensor* getOutput(int32_t index) const noexcept
7116 {
7117 return mImpl->getOutput(index);
7118 }
7119
7132 bool setName(char const* name) noexcept
7133 {
7134 return mImpl->setName(name);
7135 }
7136
7144 char const* getName() const noexcept
7145 {
7146 return mImpl->getName();
7147 }
7148
7161 {
7162 return mImpl->setNormalizationQuantizeScale(tensor);
7163 }
7164
7172 {
7173 return mImpl->getNormalizationQuantizeScale();
7174 }
7175
7185 {
7186 return mImpl->setNormalizationQuantizeToType(type);
7187 }
7188
7197 {
7198 return mImpl->getNormalizationQuantizeToType();
7199 }
7200
7216 bool setMetadata(char const* metadata) noexcept
7217 {
7218 return mImpl->setMetadata(metadata);
7219 }
7220
7229 char const* getMetadata() const noexcept
7230 {
7231 return mImpl->getMetadata();
7232 }
7233
7245 bool setNbRanks(int32_t nbRanks) noexcept
7246 {
7247 return mImpl->setNbRanks(nbRanks);
7248 }
7249
7257 int32_t getNbRanks() const noexcept
7258 {
7259 return mImpl->getNbRanks();
7260 }
7261
7262protected:
7263 apiv::VAttention* mImpl;
7264 virtual ~IAttention() noexcept = default;
7265};
7266
7274{
7275public:
7281 void setInterleaved(bool interleaved) noexcept
7282 {
7283 mImpl->setInterleaved(interleaved);
7284 }
7285
7286
7292 TRT_NODISCARD bool getInterleaved() const noexcept
7293 {
7294 return mImpl->getInterleaved();
7295 }
7296
7297
7303 TRT_NODISCARD bool setRotaryEmbeddingDim(int32_t rotaryEmbeddingDim) noexcept
7304 {
7305 return mImpl->setRotaryEmbeddingDim(rotaryEmbeddingDim);
7306 }
7307
7308
7314 TRT_NODISCARD int32_t getRotaryEmbeddingDim() const noexcept
7315 {
7316 return mImpl->getRotaryEmbeddingDim();
7317 }
7318
7319
7333 using ILayer::setInput;
7334
7335
7336protected:
7337 apiv::VRotaryEmbeddingLayer* mImpl;
7338 virtual ~IRotaryEmbeddingLayer() noexcept = default;
7339};
7340
7346enum class KVCacheMode : int32_t
7347{
7348 kLINEAR = 0,
7349};
7350
7351namespace impl
7352{
7358template <>
7360{
7361 static constexpr int32_t kVALUE = 1;
7362};
7363
7364} // namespace impl
7365
7386{
7387public:
7400 using ILayer::setInput;
7401
7409 bool setCacheMode(KVCacheMode cacheMode) noexcept
7410 {
7411 return mImpl->setCacheMode(cacheMode);
7412 }
7413
7419 KVCacheMode getCacheMode() const noexcept
7420 {
7421 return mImpl->getCacheMode();
7422 }
7423
7424protected:
7425 apiv::VKVCacheUpdateLayer* mImpl;
7426 virtual ~IKVCacheUpdateLayer() noexcept = default;
7427};
7428
7434enum class MoEActType : int32_t
7435{
7436 kNONE = 0,
7437 kSILU = 1,
7438};
7439
7440namespace impl
7441{
7442
7448template <>
7450{
7451 static constexpr int32_t kVALUE = 2;
7452};
7453
7454} // namespace impl
7455
7567class IMoELayer : public ILayer
7568{
7569public:
7581 void setGatedWeights(ITensor& fcGateWeights, ITensor& fcUpWeights, ITensor& fcDownWeights, MoEActType activationType) noexcept
7582 {
7583 mImpl->setGatedWeights(fcGateWeights, fcUpWeights, fcDownWeights, activationType);
7584 }
7585
7593 void setGatedBiases(ITensor& fcGateBiases, ITensor& fcUpBiases, ITensor& fcDownBiases) noexcept
7594 {
7595 mImpl->setGatedBiases(fcGateBiases, fcUpBiases, fcDownBiases);
7596 }
7597
7605 void setActivationType(MoEActType activationType) noexcept
7606 {
7607 mImpl->setActivationType(activationType);
7608 }
7609
7618 {
7619 return mImpl->getActivationType();
7620 }
7621
7640 void setQuantizationStatic(ITensor& fcDownActivationScale, DataType dataType) noexcept
7641 {
7642 mImpl->setQuantizationStatic(fcDownActivationScale, dataType);
7643 }
7644
7669 void setQuantizationDynamicDblQ(ITensor& fcDownActivationDblQScale, DataType dataType, Dims const& blockShape, DataType dynQOutputScaleType) noexcept
7670 {
7671 mImpl->setQuantizationDynamicDblQ(fcDownActivationDblQScale, dataType, blockShape, dynQOutputScaleType);
7672 }
7673
7685 {
7686 mImpl->setQuantizationToType(type);
7687 }
7688
7697 {
7698 return mImpl->getQuantizationToType();
7699 }
7700
7712 void setQuantizationBlockShape(Dims const& blockShape) noexcept
7713 {
7714 mImpl->setQuantizationBlockShape(blockShape);
7715 }
7716
7725 {
7726 return mImpl->getQuantizationBlockShape();
7727 }
7728
7737 {
7738 mImpl->setDynQOutputScaleType(type);
7739 }
7740
7749 {
7750 return mImpl->getDynQOutputScaleType();
7751 }
7752
7769 void setSwigluParams(float limit, float alpha, float beta) noexcept
7770 {
7771 mImpl->setSwigluParams(limit, alpha, beta);
7772 }
7773
7783 void setSwigluParamLimit(float limit) noexcept
7784 {
7785 mImpl->setSwigluParamLimit(limit);
7786 }
7787
7795 float getSwigluParamLimit() const noexcept
7796 {
7797 return mImpl->getSwigluParamLimit();
7798 }
7799
7809 void setSwigluParamAlpha(float alpha) noexcept
7810 {
7811 mImpl->setSwigluParamAlpha(alpha);
7812 }
7813
7821 float getSwigluParamAlpha() const noexcept
7822 {
7823 return mImpl->getSwigluParamAlpha();
7824 }
7825
7835 void setSwigluParamBeta(float beta) noexcept
7836 {
7837 mImpl->setSwigluParamBeta(beta);
7838 }
7839
7847 float getSwigluParamBeta() const noexcept
7848 {
7849 return mImpl->getSwigluParamBeta();
7850 }
7851
7864 void setInput(int32_t index, ITensor& tensor) noexcept
7865 {
7866 mImpl->setInput(index, tensor);
7867 }
7868
7869 using ILayer::setInput;
7870
7871protected:
7872 virtual ~IMoELayer() noexcept = default;
7873 apiv::VMoELayer* mImpl;
7874};
7875
7884{
7885protected:
7886 virtual ~IDistCollectiveLayer() noexcept = default;
7887 apiv::VDistCollectiveLayer* mImpl;
7888}; // class IDistCollectiveLayer
7889
7908{
7909public:
7910 virtual ~INetworkDefinition() noexcept = default;
7911
7947 ITensor* addInput(char const* name, DataType type, Dims const& dimensions) noexcept
7948 {
7949 return mImpl->addInput(name, type, dimensions);
7950 }
7951
7961 void markOutput(ITensor& tensor) noexcept
7962 {
7963 mImpl->markOutput(tensor);
7964 }
7965
7979 bool markDebug(ITensor& tensor) noexcept
7980 {
7981 return mImpl->markDebug(tensor);
7982 }
7983
7995 bool unmarkDebug(ITensor& tensor) noexcept
7996 {
7997 return mImpl->unmarkDebug(tensor);
7998 }
7999
8005 bool isDebugTensor(ITensor const& tensor) const noexcept
8006 {
8007 return mImpl->isDebugTensor(tensor);
8008 }
8009
8028 {
8029 return mImpl->markUnfusedTensorsAsDebugTensors();
8030 }
8031
8042 {
8043 return mImpl->unmarkUnfusedTensorsAsDebugTensors();
8044 }
8045
8062 {
8063 return mImpl->addActivation(input, type);
8064 }
8065
8080 ILRNLayer* addLRN(ITensor& input, int64_t window, float alpha, float beta, float k) noexcept
8081 {
8082 return mImpl->addLRN(input, window, alpha, beta, k);
8083 }
8084
8106 IScaleLayer* addScale(ITensor& input, ScaleMode mode, Weights shift, Weights scale, Weights power) noexcept
8107 {
8108 return mImpl->addScale(input, mode, shift, scale, power);
8109 }
8110
8120 {
8121 return mImpl->addSoftMax(input);
8122 }
8123
8136 IConcatenationLayer* addConcatenation(ITensor* const* inputs, int32_t nbInputs) noexcept
8137 {
8138 return mImpl->addConcatenation(inputs, nbInputs);
8139 }
8140
8164 {
8165 return mImpl->addElementWise(input1, input2, op);
8166 }
8167
8185 IUnaryLayer* addUnary(ITensor& input, UnaryOperation operation) noexcept
8186 {
8187 return mImpl->addUnary(input, operation);
8188 }
8189
8200 {
8201 return mImpl->addShuffle(input);
8202 }
8203
8216 IOneHotLayer* addOneHot(ITensor& indices, ITensor& values, ITensor& depth, int32_t axis) noexcept
8217 {
8218 return mImpl->addOneHot(indices, values, depth, axis);
8219 }
8220
8228 int32_t getNbLayers() const noexcept
8229 {
8230 return mImpl->getNbLayers();
8231 }
8232
8242 ILayer* getLayer(int32_t index) const noexcept
8243 {
8244 return mImpl->getLayer(index);
8245 }
8246
8254 int32_t getNbInputs() const noexcept
8255 {
8256 return mImpl->getNbInputs();
8257 }
8258
8270 ITensor* getInput(int32_t index) const noexcept
8271 {
8272 return mImpl->getInput(index);
8273 }
8274
8284 int32_t getNbOutputs() const noexcept
8285 {
8286 return mImpl->getNbOutputs();
8287 }
8288
8300 ITensor* getOutput(int32_t index) const noexcept
8301 {
8302 return mImpl->getOutput(index);
8303 }
8304
8327 ITensor& input, ReduceOperation operation, uint32_t reduceAxes, bool keepDimensions) noexcept
8328 {
8329 return mImpl->addReduce(input, operation, reduceAxes, keepDimensions);
8330 }
8331
8362 TRT_DEPRECATED ITopKLayer* addTopK(ITensor& input, TopKOperation op, int32_t k, uint32_t reduceAxes) noexcept
8363 {
8364 return mImpl->addTopK(input, op, k, reduceAxes);
8365 }
8366
8395 ITopKLayer* addTopK(ITensor& input, TopKOperation op, int32_t k, uint32_t reduceAxes, DataType indicesType) noexcept
8396 {
8397 return mImpl->addTopKV2(input, op, k, reduceAxes, indicesType);
8398 }
8399
8411 IGatherLayer* addGather(ITensor& data, ITensor& indices, int32_t axis) noexcept
8412 {
8413 return mImpl->addGather(data, indices, axis);
8414 }
8415
8427 IGatherLayer* addGatherV2(ITensor& data, ITensor& indices, GatherMode mode) noexcept
8428 {
8429 return mImpl->addGatherV2(data, indices, mode);
8430 }
8431
8447 {
8448 return mImpl->addRaggedSoftMax(input, bounds);
8449 }
8450
8468 ITensor& input0, MatrixOperation op0, ITensor& input1, MatrixOperation op1) noexcept
8469 {
8470 return mImpl->addMatrixMultiply(input0, op0, input1, op1);
8471 }
8472
8487 {
8488 return mImpl->addNonZero(input);
8489 }
8490
8502 INonZeroLayer* addNonZero(ITensor& input, DataType indicesType) noexcept
8503 {
8504 return mImpl->addNonZeroV2(input, indicesType);
8505 }
8506
8526 IConstantLayer* addConstant(Dims const& dimensions, Weights weights) noexcept
8527 {
8528 return mImpl->addConstant(dimensions, weights);
8529 }
8530
8541 {
8542 return mImpl->addIdentity(input);
8543 }
8544
8555 ICastLayer* addCast(ITensor& input, DataType toType) noexcept
8556 {
8557 return mImpl->addCast(input, toType);
8558 }
8559
8570 void removeTensor(ITensor& tensor) noexcept
8571 {
8572 mImpl->removeTensor(tensor);
8573 }
8574
8582 void unmarkOutput(ITensor& tensor) noexcept
8583 {
8584 mImpl->unmarkOutput(tensor);
8585 }
8586
8601 ISliceLayer* addSlice(ITensor& input, Dims const& start, Dims const& size, Dims const& stride) noexcept
8602 {
8603 return mImpl->addSlice(input, start, size, stride);
8604 }
8605
8625 void setName(char const* name) noexcept
8626 {
8627 mImpl->setName(name);
8628 }
8629
8639 char const* getName() const noexcept
8640 {
8641 return mImpl->getName();
8642 }
8643
8655 IShapeLayer* addShape(ITensor& input) noexcept
8656 {
8657 return mImpl->addShape(input);
8658 }
8659
8666 {
8667 return mImpl->getFlags();
8668 }
8669
8677 bool getFlag(NetworkDefinitionCreationFlag networkDefinitionCreationFlag) const noexcept
8678 {
8679 return mImpl->getFlag(networkDefinitionCreationFlag);
8680 }
8681
8694 bool markOutputForShapes(ITensor& tensor) noexcept
8695 {
8696 return mImpl->markOutputForShapes(tensor);
8697 }
8698
8706 bool unmarkOutputForShapes(ITensor& tensor) noexcept
8707 {
8708 return mImpl->unmarkOutputForShapes(tensor);
8709 }
8710
8725 {
8726 return mImpl->addParametricReLU(input, slope);
8727 }
8728
8747 ITensor& input, int64_t nbOutputMaps, Dims const& kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
8748 {
8749 return mImpl->addConvolutionNd(input, nbOutputMaps, kernelSize, kernelWeights, biasWeights);
8750 }
8751
8766 IPoolingLayer* addPoolingNd(ITensor& input, PoolingType type, Dims const& windowSize) noexcept
8767 {
8768 return mImpl->addPoolingNd(input, type, windowSize);
8769 }
8770
8785 //
8789 ITensor& input, int64_t nbOutputMaps, Dims kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
8790 {
8791 return mImpl->addDeconvolutionNd(input, nbOutputMaps, kernelSize, kernelWeights, biasWeights);
8792 }
8793
8826 ITensor& input, ScaleMode mode, Weights shift, Weights scale, Weights power, int32_t channelAxis) noexcept
8827 {
8828 return mImpl->addScaleNd(input, mode, shift, scale, power, channelAxis);
8829 }
8830
8843 {
8844 return mImpl->addResize(input);
8845 }
8846
8856 ILoop* addLoop() noexcept
8857 {
8858 return mImpl->addLoop();
8859 }
8860
8872 {
8873 return mImpl->addIfConditional();
8874 }
8875
8910 ISelectLayer* addSelect(ITensor& condition, ITensor& thenInput, ITensor& elseInput) noexcept
8911 {
8912 return mImpl->addSelect(condition, thenInput, elseInput);
8913 }
8914
8927 IAssertionLayer* addAssertion(ITensor& condition, char const* message) noexcept
8928 {
8929 return mImpl->addAssertion(condition, message);
8930 }
8931
8953 IFillLayer* addFill(Dims const& dimensions, FillOperation op, DataType outputType) noexcept
8954 {
8955 return mImpl->addFillV2(dimensions, op, outputType);
8956 }
8957
8969 IPaddingLayer* addPaddingNd(ITensor& input, Dims const& prePadding, Dims const& postPadding) noexcept
8970 {
8971 return mImpl->addPaddingNd(input, prePadding, postPadding);
8972 }
8973
8993 bool setWeightsName(Weights weights, char const* name) noexcept
8994 {
8995 return mImpl->setWeightsName(weights, name);
8996 }
8997
9009 //
9012 void setErrorRecorder(IErrorRecorder* recorder) noexcept
9013 {
9014 mImpl->setErrorRecorder(recorder);
9015 }
9016
9028 {
9029 return mImpl->getErrorRecorder();
9030 }
9031
9050 IDequantizeLayer* addDequantize(ITensor& input, ITensor& scale, DataType outputType) noexcept
9051 {
9052 return mImpl->addDequantizeV2(input, scale, outputType);
9053 }
9054
9070 IScatterLayer* addScatter(ITensor& data, ITensor& indices, ITensor& updates, ScatterMode mode) noexcept
9071 {
9072 return mImpl->addScatter(data, indices, updates, mode);
9073 }
9074
9094 IQuantizeLayer* addQuantize(ITensor& input, ITensor& scale, DataType outputType) noexcept
9095 {
9096 return mImpl->addQuantizeV2(input, scale, outputType);
9097 }
9098
9122 ITensor& input, int32_t axis, int32_t blockSize, DataType outputType, DataType scaleType) noexcept
9123 {
9124 return mImpl->addDynamicQuantize(input, axis, blockSize, outputType, scaleType);
9125 }
9126
9146 ITensor& input, Dims const& blockShape, DataType outputType, DataType scaleType) noexcept
9147 {
9148 return mImpl->addDynamicQuantizeV2(input, blockShape, outputType, scaleType);
9149 }
9150
9161 IEinsumLayer* addEinsum(ITensor* const* inputs, int32_t nbInputs, char const* equation) noexcept
9162 {
9163 return mImpl->addEinsum(inputs, nbInputs, equation);
9164 }
9165
9180 {
9181 return mImpl->addGridSample(input, grid);
9182 }
9183
9201 TRT_DEPRECATED INMSLayer* addNMS(ITensor& boxes, ITensor& scores, ITensor& maxOutputBoxesPerClass) noexcept
9202 {
9203 return mImpl->addNMS(boxes, scores, maxOutputBoxesPerClass);
9204 }
9205
9221 INMSLayer* addNMS(ITensor& boxes, ITensor& scores, ITensor& maxOutputBoxesPerClass, DataType indicesType) noexcept
9222 {
9223 return mImpl->addNMSV2(boxes, scores, maxOutputBoxesPerClass, indicesType);
9224 }
9225
9239 {
9240 return mImpl->addReverseSequence(input, sequenceLens);
9241 }
9242
9270 TRT_DEPRECATED INormalizationLayer* addNormalization(ITensor& input, ITensor& scale, ITensor& bias, uint32_t axesMask) noexcept
9271 {
9272 return mImpl->addNormalization(input, scale, bias, axesMask);
9273 }
9274
9292 ICumulativeLayer* addCumulative(ITensor& input, ITensor& axis, CumulativeOperation operation, bool exclusive, bool reverse) noexcept
9293 {
9294 return mImpl->addCumulative(input, axis, operation, exclusive, reverse);
9295 }
9296
9320 ITensor& query, ITensor& key, ITensor& value, AttentionNormalizationOp normOp, bool causal) noexcept
9321 {
9322 return mImpl->addAttention(query, key, value, normOp, causal);
9323 }
9324
9344 IRotaryEmbeddingLayer* addRotaryEmbedding(ITensor& input, ITensor& cosCache, ITensor& sinCache, bool interleaved, int32_t rotaryEmbeddingDim) noexcept
9345 {
9346 return mImpl->addRotaryEmbedding(input, cosCache, sinCache, interleaved, rotaryEmbeddingDim);
9347 }
9348
9379 ITensor& cache, ITensor& update, ITensor& writeIndices, KVCacheMode cacheMode) noexcept
9380 {
9381 return mImpl->addKVCacheUpdate(cache, update, writeIndices, cacheMode);
9382 }
9383
9398 IMoELayer* addMoE(ITensor& hiddenStates, ITensor& selectedExpertsForTokens, ITensor& scoresForSelectedExperts) noexcept
9399 {
9400 return mImpl->addMoE(hiddenStates, selectedExpertsForTokens, scoresForSelectedExperts);
9401 }
9402
9427 ReduceOperation reduceOp, int64_t root, int64_t* groups, int64_t groupSize) noexcept
9428 {
9429 return mImpl->addDistCollective(input, distCollectiveOp, reduceOp, root, groups, groupSize);
9430 }
9431
9438 virtual IBuilder& getBuilder() const noexcept
9439 {
9440 return mImpl->getBuilder();
9441 }
9442
9451 bool markWeightsRefittable(char const* name) noexcept
9452 {
9453 return mImpl->markWeightsRefittable(name);
9454 }
9455
9463 bool unmarkWeightsRefittable(char const* name) noexcept
9464 {
9465 return mImpl->unmarkWeightsRefittable(name);
9466 }
9467
9476 bool areWeightsMarkedRefittable(char const* name) const noexcept
9477 {
9478 return mImpl->areWeightsMarkedRefittable(name);
9479 }
9480
9495 ISqueezeLayer* addSqueeze(ITensor& input, ITensor& axes) noexcept
9496 {
9497 return mImpl->addSqueeze(input, axes);
9498 }
9499
9517 {
9518 return mImpl->addUnsqueeze(input, axes);
9519 }
9520
9542 TRT_NODISCARD INormalizationLayer* addNormalizationV2(ITensor& input, ITensor& scale, ITensor& bias, uint32_t axesMask) noexcept
9543 {
9544 return mImpl->addNormalizationV2(input, scale, bias, axesMask);
9545 }
9546
9547protected:
9548 apiv::VNetworkDefinition* mImpl;
9549};
9550
9568enum class RuntimePlatform : int32_t
9569{
9572 kSAME_AS_BUILD = 0,
9573
9576 kWINDOWS_AMD64 = 1,
9577
9578
9579};
9580
9581namespace impl
9582{
9588template <>
9590{
9591 static constexpr int32_t kVALUE = 2;
9592};
9593} // namespace impl
9594
9601using BuilderFlags = uint32_t;
9602
9610enum class BuilderFlag : int32_t
9611{
9615
9619
9621 kDEBUG = 2,
9622
9624 kGPU_FALLBACK = 3,
9625
9627 kREFIT = 4,
9628
9631
9635 kTF32 = 6,
9636
9638 kSPARSE_WEIGHTS = 7,
9639
9652
9656
9661
9667
9671
9678
9684
9691
9695
9700
9706
9708 kSTRIP_PLAN = 19,
9709
9712
9719 kREFIT_IDENTICAL = 20,
9720
9746 kWEIGHT_STREAMING = 21,
9747
9751
9756 kREFIT_INDIVIDUAL = 23,
9757
9766 kSTRICT_NANS = 24,
9767
9769 kMONITOR_MEMORY = 25,
9770
9774
9777
9789
9790#if ENABLE_FEATURE_DISABLE_RUNTIME_ALLOCATION
9797 kREQUIRE_USER_ALLOCATION = 29,
9798#endif // ENABLE_FEATURE_DISABLE_RUNTIME_ALLOCATION
9799
9800};
9801
9807template <>
9808constexpr inline int32_t EnumMax<BuilderFlag>() noexcept
9809{
9810#if ENABLE_FEATURE_DISABLE_RUNTIME_ALLOCATION
9811 return 30;
9812#else
9813 return 29;
9814#endif // ENABLE_FEATURE_DISABLE_RUNTIME_ALLOCATION
9815}
9816
9817namespace v_1_0
9818{
9834{
9835 uint8_t data[16];
9836};
9837
9848{
9850 uint64_t tacticHash;
9854 static constexpr uint64_t kINVALID_TACTIC_HASH = UINT64_MAX;
9855};
9856} // namespace v_1_0
9857
9874{
9875public:
9876 virtual ~ITimingCache() noexcept = default;
9877
9887 nvinfer1::IHostMemory* serialize() const noexcept
9888 {
9889 return mImpl->serialize();
9890 }
9891
9911 bool combine(ITimingCache const& inputCache, bool ignoreMismatch) noexcept
9912 {
9913 return mImpl->combine(inputCache, ignoreMismatch);
9914 }
9915
9921 bool reset() noexcept
9922 {
9923 return mImpl->reset();
9924 }
9925
9940 int64_t queryKeys(TimingCacheKey* keyBuffer, int64_t capacity) const noexcept
9941 {
9942 return mImpl->queryKeys(keyBuffer, capacity);
9943 }
9944
9957 TimingCacheValue query(TimingCacheKey const& key) const noexcept
9958 {
9959 return mImpl->query(key);
9960 }
9961
9979 bool update(TimingCacheKey const& key, TimingCacheValue const& value) noexcept
9980 {
9981 return mImpl->update(key, value);
9982 }
9983
9984protected:
9985 apiv::VTimingCache* mImpl;
9986};
9987
9995enum class MemoryPoolType : int32_t
9996{
10003 kWORKSPACE = 0,
10004
10012
10018 kDLA_LOCAL_DRAM = 2,
10019
10025 kDLA_GLOBAL_DRAM = 3,
10026
10034 kTACTIC_DRAM = 4,
10035
10049};
10050
10056template <>
10057constexpr inline int32_t EnumMax<MemoryPoolType>() noexcept
10058{
10059 return 6;
10060}
10061
10070enum class PreviewFeature : int32_t
10071{
10078
10083
10090
10099};
10100
10101namespace impl
10102{
10108template <>
10110{
10111 static constexpr int32_t kVALUE = 4;
10112};
10113} // namespace impl
10114
10123enum class HardwareCompatibilityLevel : int32_t
10124{
10127 kNONE = 0,
10128
10140 kAMPERE_PLUS = 1,
10141
10151};
10152
10153namespace impl
10154{
10160template <>
10162{
10163 static constexpr int32_t kVALUE = 3;
10164};
10165} // namespace impl
10166
10172enum class ComputeCapability : int32_t
10173{
10175 kNONE = 0,
10177 kCURRENT = 1,
10179 kSM75 = 75,
10181 kSM80 = 80,
10183 kSM86 = 86,
10185 kSM89 = 89,
10187 kSM120 = 120,
10188};
10189
10198enum class TilingOptimizationLevel : int32_t
10199{
10201 kNONE = 0,
10202
10204 kFAST = 1,
10205
10208 kMODERATE = 2,
10209
10211 kFULL = 3
10212
10213};
10214
10215namespace impl
10216{
10222template <>
10224{
10225 static constexpr int32_t kVALUE = 4;
10226};
10227} // namespace impl
10228
10229namespace v_1_0
10230{
10232{
10233public:
10234 IProgressMonitor() = default;
10235 virtual ~IProgressMonitor() noexcept = default;
10236
10240 InterfaceInfo getInterfaceInfo() const noexcept override
10241 {
10242 return InterfaceInfo{"IProgressMonitor", 1, 0};
10243 }
10244
10264 virtual void phaseStart(char const* phaseName, char const* parentPhase, int32_t nbSteps) noexcept = 0;
10265
10278 virtual bool stepComplete(char const* phaseName, int32_t step) noexcept = 0;
10279
10291 virtual void phaseFinish(char const* phaseName) noexcept = 0;
10292
10293}; // class IProgressMonitor
10294} // namespace v_1_0
10295
10316
10325{
10326public:
10327 virtual ~IBuilderConfig() noexcept = default;
10328
10337 virtual void setAvgTimingIterations(int32_t avgTiming) noexcept
10338 {
10339 mImpl->setAvgTimingIterations(avgTiming);
10340 }
10341
10349 int32_t getAvgTimingIterations() const noexcept
10350 {
10351 return mImpl->getAvgTimingIterations();
10352 }
10353
10362 void setEngineCapability(EngineCapability capability) noexcept
10363 {
10364 mImpl->setEngineCapability(capability);
10365 }
10366
10375 {
10376 return mImpl->getEngineCapability();
10377 }
10378
10391 void setFlags(BuilderFlags builderFlags) noexcept
10392 {
10393 mImpl->setFlags(builderFlags);
10394 }
10395
10403 BuilderFlags getFlags() const noexcept
10404 {
10405 return mImpl->getFlags();
10406 }
10407
10415 void clearFlag(BuilderFlag builderFlag) noexcept
10416 {
10417 mImpl->clearFlag(builderFlag);
10418 }
10419
10427 void setFlag(BuilderFlag builderFlag) noexcept
10428 {
10429 mImpl->setFlag(builderFlag);
10430 }
10431
10439 bool getFlag(BuilderFlag builderFlag) const noexcept
10440 {
10441 return mImpl->getFlag(builderFlag);
10442 }
10443
10456 void setDeviceType(ILayer const* layer, DeviceType deviceType) noexcept
10457 {
10458 mImpl->setDeviceType(layer, deviceType);
10459 }
10460
10466 DeviceType getDeviceType(ILayer const* layer) const noexcept
10467 {
10468 return mImpl->getDeviceType(layer);
10469 }
10470
10478 bool isDeviceTypeSet(ILayer const* layer) const noexcept
10479 {
10480 return mImpl->isDeviceTypeSet(layer);
10481 }
10482
10488 void resetDeviceType(ILayer const* layer) noexcept
10489 {
10490 mImpl->resetDeviceType(layer);
10491 }
10492
10498 bool canRunOnDLA(ILayer const* layer) const noexcept
10499 {
10500 return mImpl->canRunOnDLA(layer);
10501 }
10502
10514 void setDLACore(int32_t dlaCore) noexcept
10515 {
10516 mImpl->setDLACore(dlaCore);
10517 }
10518
10524 int32_t getDLACore() const noexcept
10525 {
10526 return mImpl->getDLACore();
10527 }
10528
10535 void setDefaultDeviceType(DeviceType deviceType) noexcept
10536 {
10537 mImpl->setDefaultDeviceType(deviceType);
10538 }
10539
10546 {
10547 return mImpl->getDefaultDeviceType();
10548 }
10549
10555 void reset() noexcept
10556 {
10557 mImpl->reset();
10558 }
10559
10567 void setProfileStream(const cudaStream_t stream) noexcept
10568 {
10569 return mImpl->setProfileStream(stream);
10570 }
10571
10579 cudaStream_t getProfileStream() const noexcept
10580 {
10581 return mImpl->getProfileStream();
10582 }
10583
10596 int32_t addOptimizationProfile(IOptimizationProfile const* profile) noexcept
10597 {
10598 return mImpl->addOptimizationProfile(profile);
10599 }
10600
10609 int32_t getNbOptimizationProfiles() const noexcept
10610 {
10611 return mImpl->getNbOptimizationProfiles();
10612 }
10613
10622 {
10623 mImpl->setProfilingVerbosity(verbosity);
10624 }
10625
10635 {
10636 return mImpl->getProfilingVerbosity();
10637 }
10638
10656 bool setTacticSources(TacticSources tacticSources) noexcept
10657 {
10658 return mImpl->setTacticSources(tacticSources);
10659 }
10660
10672 {
10673 return mImpl->getTacticSources();
10674 }
10675
10693 TRT_DEPRECATED nvinfer1::ITimingCache* createTimingCache(void const* blob, std::size_t size) const noexcept
10694 {
10695 return mImpl->createTimingCache(blob, size);
10696 }
10697
10718 TRT_DEPRECATED bool setTimingCache(ITimingCache const& cache, bool ignoreMismatch) noexcept
10719 {
10720 return mImpl->setTimingCache(cache, ignoreMismatch);
10721 }
10722
10731 {
10732 return mImpl->getTimingCache();
10733 }
10734
10762 void setMemoryPoolLimit(MemoryPoolType pool, std::size_t poolSize) noexcept
10763 {
10764 mImpl->setMemoryPoolLimit(pool, poolSize);
10765 }
10766
10781 std::size_t getMemoryPoolLimit(MemoryPoolType pool) const noexcept
10782 {
10783 return mImpl->getMemoryPoolLimit(pool);
10784 }
10785
10799 void setPreviewFeature(PreviewFeature feature, bool enable) noexcept
10800 {
10801 mImpl->setPreviewFeature(feature, enable);
10802 }
10803
10813 bool getPreviewFeature(PreviewFeature feature) const noexcept
10814 {
10815 return mImpl->getPreviewFeature(feature);
10816 }
10817
10846 void setBuilderOptimizationLevel(int32_t level) noexcept
10847 {
10848 mImpl->setBuilderOptimizationLevel(level);
10849 }
10850
10859 {
10860 return mImpl->getBuilderOptimizationLevel();
10861 }
10862
10875 void setHardwareCompatibilityLevel(HardwareCompatibilityLevel hardwareCompatibilityLevel) noexcept
10876 {
10877 mImpl->setHardwareCompatibilityLevel(hardwareCompatibilityLevel);
10878 }
10879
10889 {
10890 return mImpl->getHardwareCompatibilityLevel();
10891 }
10892
10901 void setPluginsToSerialize(char const* const* paths, int32_t nbPaths) noexcept
10902 {
10903 mImpl->setPluginsToSerialize(paths, nbPaths);
10904 }
10905
10914 char const* getPluginToSerialize(int32_t index) const noexcept
10915 {
10916 return mImpl->getPluginToSerialize(index);
10917 }
10918
10924 int32_t getNbPluginsToSerialize() const noexcept
10925 {
10926 return mImpl->getNbPluginsToSerialize();
10927 }
10928
10953 void setMaxAuxStreams(int32_t nbStreams) noexcept
10954 {
10955 mImpl->setMaxAuxStreams(nbStreams);
10956 }
10957
10963 int32_t getMaxAuxStreams() const noexcept
10964 {
10965 return mImpl->getMaxAuxStreams();
10966 }
10967
10979 void setProgressMonitor(IProgressMonitor* monitor) noexcept
10980 {
10981 return mImpl->setProgressMonitor(monitor);
10982 }
10983
10990 {
10991 return mImpl->getProgressMonitor();
10992 }
10993
11005 void setRuntimePlatform(RuntimePlatform runtimePlatform) noexcept
11006 {
11007 mImpl->setRuntimePlatform(runtimePlatform);
11008 }
11009
11018 {
11019 return mImpl->getRuntimePlatform();
11020 }
11021
11029 void setMaxNbTactics(int32_t maxNbTactics) noexcept
11030 {
11031 mImpl->setMaxNbTactics(maxNbTactics);
11032 }
11033
11041 int32_t getMaxNbTactics() const noexcept
11042 {
11043 return mImpl->getMaxNbTactics();
11044 }
11045
11058 {
11059 return mImpl->setTilingOptimizationLevel(level);
11060 }
11061
11070 {
11071 return mImpl->getTilingOptimizationLevel();
11072 }
11073
11085 bool setL2LimitForTiling(int64_t size) noexcept
11086 {
11087 return mImpl->setL2LimitForTiling(size);
11088 }
11089
11097 int64_t getL2LimitForTiling() const noexcept
11098 {
11099 return mImpl->getL2LimitForTiling();
11100 }
11101
11116 bool setNbComputeCapabilities(int32_t maxNbComputeCapabilities) noexcept
11117 {
11118 return mImpl->setNbComputeCapabilities(maxNbComputeCapabilities);
11119 }
11120
11128 int32_t getNbComputeCapabilities() const noexcept
11129 {
11130 return mImpl->getNbComputeCapabilities();
11131 }
11132
11146 bool setComputeCapability(ComputeCapability computeCapability, int32_t index) noexcept
11147 {
11148 return mImpl->setComputeCapability(computeCapability, index);
11149 }
11150
11160 ComputeCapability getComputeCapability(int32_t index) const noexcept
11161 {
11162 return mImpl->getComputeCapability(index);
11163 }
11164
11165protected:
11166 apiv::VBuilderConfig* mImpl;
11167};
11168
11177
11187{
11192
11197 kSTRONGLY_TYPED = 1,
11198};
11199
11205template <>
11206constexpr inline int32_t EnumMax<NetworkDefinitionCreationFlag>() noexcept
11207{
11208 return 2;
11209}
11210
11218class IBuilder : public INoCopy
11219{
11220public:
11221 virtual ~IBuilder() noexcept = default;
11222
11230 int32_t getMaxDLABatchSize() const noexcept
11231 {
11232 return mImpl->getMaxDLABatchSize();
11233 }
11234
11238 int32_t getNbDLACores() const noexcept
11239 {
11240 return mImpl->getNbDLACores();
11241 }
11242
11256 void setGpuAllocator(IGpuAllocator* allocator) noexcept
11257 {
11258 mImpl->setGpuAllocator(allocator);
11259 }
11260
11271 {
11272 return mImpl->createBuilderConfig();
11273 }
11274
11297 {
11298 return mImpl->createNetworkV2(flags);
11299 }
11300
11312 {
11313 return mImpl->createOptimizationProfile();
11314 }
11315
11330 void setErrorRecorder(IErrorRecorder* recorder) noexcept
11331 {
11332 mImpl->setErrorRecorder(recorder);
11333 }
11334
11346 {
11347 return mImpl->getErrorRecorder();
11348 }
11349
11353 void reset() noexcept
11354 {
11355 mImpl->reset();
11356 }
11357
11373 {
11374 return mImpl->buildSerializedNetwork(network, config);
11375 }
11376
11394 INetworkDefinition& network, IBuilderConfig& config, IStreamWriter& writer) noexcept
11395 {
11396 return mImpl->buildSerializedNetworkToStream(network, config, writer);
11397 }
11398
11399
11421 bool isNetworkSupported(INetworkDefinition const& network, IBuilderConfig const& config) const noexcept
11422 {
11423 return mImpl->isNetworkSupported(network, config);
11424 }
11425
11431 ILogger* getLogger() const noexcept
11432 {
11433 return mImpl->getLogger();
11434 }
11435
11447 bool setMaxThreads(int32_t maxThreads) noexcept
11448 {
11449 return mImpl->setMaxThreads(maxThreads);
11450 }
11451
11461 int32_t getMaxThreads() const noexcept
11462 {
11463 return mImpl->getMaxThreads();
11464 }
11465
11472 {
11473 return mImpl->getPluginRegistry();
11474 }
11475
11476protected:
11477 apiv::VBuilder* mImpl;
11478};
11479
11480} // namespace nvinfer1
11481
11486extern "C" TENSORRTAPI void* createInferBuilder_INTERNAL(void* logger, int32_t version) noexcept;
11487
11488namespace nvinfer1
11489{
11490namespace
11491{
11492
11500inline IBuilder* createInferBuilder(ILogger& logger) noexcept
11501{
11502 return static_cast<IBuilder*>(createInferBuilder_INTERNAL(&logger, NV_TENSORRT_VERSION));
11503}
11504
11505} // namespace
11506
11519 nvinfer1::EngineCapability capability) noexcept;
11520
11530extern "C" TENSORRTAPI bool setInternalLibraryPath(AsciiChar const* path) noexcept;
11531
11532namespace safe
11533{
11535class IPluginRegistry;
11536} // namespace safe
11537
11538
11539} // namespace nvinfer1
11540
11541#endif // NV_INFER_H
#define TENSORRTAPI
Definition: NvInferRuntimeBase.h:70
#define NV_TENSORRT_VERSION
Definition: NvInferRuntimeBase.h:102
#define TRT_NODISCARD
A stand-in for [[nodiscard]] and [[nodiscard(REASON)]] that works with older compilers.
Definition: NvInferRuntimeBase.h:57
#define TRT_DEPRECATED
Definition: NvInferRuntimeBase.h:42
#define TRT_DEPRECATED_ENUM
Definition: NvInferRuntimeBase.h:43
Definition: NvInferRuntimeBase.h:219
static constexpr int32_t MAX_DIMS
The maximum rank (number of dimensions) supported for a tensor.
Definition: NvInferRuntimeBase.h:222
An Activation layer in a network definition.
Definition: NvInfer.h:1302
void setBeta(float beta) noexcept
Set the beta parameter (must be finite).
Definition: NvInfer.h:1350
void setActivationType(ActivationType type) noexcept
Set the type of activation to be performed.
Definition: NvInfer.h:1311
ActivationType getActivationType() const noexcept
Get the type of activation to be performed.
Definition: NvInfer.h:1321
float getAlpha() const noexcept
Get the alpha parameter.
Definition: NvInfer.h:1359
virtual ~IActivationLayer() noexcept=default
float getBeta() const noexcept
Get the beta parameter.
Definition: NvInfer.h:1368
void setAlpha(float alpha) noexcept
Set the alpha parameter (must be finite).
Definition: NvInfer.h:1336
An assertion layer in a network.
Definition: NvInfer.h:5004
void setMessage(char const *message) noexcept
Set the message to print if the assertion fails.
Definition: NvInfer.h:5014
char const * getMessage() const noexcept
Return the assertion message.
Definition: NvInfer.h:5024
virtual ~IAssertionLayer() noexcept=default
This is a base class for Attention boundary layers.
Definition: NvInfer.h:6837
IAttention * getAttention() const noexcept
Get a pointer to the IAttention associated with this boundary layer.
Definition: NvInfer.h:6842
virtual ~IAttentionBoundaryLayer() noexcept=default
Helper for constructing an attention that consumes query, key and value tensors.
Definition: NvInfer.h:6955
ITensor * getMask() noexcept
Get the optional mask in attention.
Definition: NvInfer.h:7005
bool setMetadata(char const *metadata) noexcept
Set the metadata for IAttention.
Definition: NvInfer.h:7216
bool setDecomposable(bool decomposable) noexcept
Set whether the attention can be decomposed to use multiple kernels if no fused kernel support found.
Definition: NvInfer.h:7042
bool setName(char const *name) noexcept
Set the name of the attention.
Definition: NvInfer.h:7132
bool getDecomposable() const noexcept
Get whether the attention can be decomposed to use multiple kernels if no fused kernel support found.
Definition: NvInfer.h:7055
ITensor * getInput(int32_t index) const noexcept
Get the IAttention input corresponding to the given index.
Definition: NvInfer.h:7095
ITensor * getOutput(int32_t index) const noexcept
Get the IAttention output corresponding to the given index. IAttention has only one output.
Definition: NvInfer.h:7115
int32_t getNbOutputs() const noexcept
Get the number of outputs of a layer. IAttention has one output.
Definition: NvInfer.h:7103
bool setNbRanks(int32_t nbRanks) noexcept
Set the number of ranks for multi-device attention execution.
Definition: NvInfer.h:7245
int32_t getNbInputs() const noexcept
Get the number of inputs of IAttention. IAttention has three inputs.
Definition: NvInfer.h:7083
bool setCausal(bool isCausal) noexcept
Set whether the attention will run a causal inference. Cannot be used together with setMask().
Definition: NvInfer.h:7018
bool setNormalizationOperation(AttentionNormalizationOp op) noexcept
Set the normalization operation for the attention.
Definition: NvInfer.h:6964
char const * getName() const noexcept
Return the name of the attention.
Definition: NvInfer.h:7144
bool setNormalizationQuantizeToType(DataType type) noexcept
Set the datatype the attention normalization is quantized to.
Definition: NvInfer.h:7184
int32_t getNbRanks() const noexcept
Get the number of ranks for multi-device execution.
Definition: NvInfer.h:7257
AttentionNormalizationOp getNormalizationOperation() const noexcept
Get the normalization operation for the attention.
Definition: NvInfer.h:6976
bool setNormalizationQuantizeScale(ITensor &tensor) noexcept
Set the quantization scale for the attention normalization output.
Definition: NvInfer.h:7160
char const * getMetadata() const noexcept
Get the metadata of IAttention.
Definition: NvInfer.h:7229
DataType getNormalizationQuantizeToType() const noexcept
Get the datatype the attention normalization is quantized to.
Definition: NvInfer.h:7196
ITensor * getNormalizationQuantizeScale() const noexcept
Get the quantization scale for the attention normalization output.
Definition: NvInfer.h:7171
bool setInput(int32_t index, ITensor &input) noexcept
Append or replace an input of this layer with a specific tensor.
Definition: NvInfer.h:7074
bool setMask(ITensor &mask) noexcept
Set whether a mask will be used for the normalization operation.
Definition: NvInfer.h:6993
bool getCausal() const noexcept
Get whether the attention will run a causal inference.
Definition: NvInfer.h:7030
apiv::VAttention * mImpl
Definition: NvInfer.h:7263
virtual ~IAttention() noexcept=default
This layer represents an input to an attention subgraph.
Definition: NvInfer.h:6863
virtual ~IAttentionInputLayer() noexcept=default
This layer represents an output of an IAttention.
Definition: NvInfer.h:6898
virtual ~IAttentionOutputLayer() noexcept=default
Holds properties for configuring a builder to produce an engine.
Definition: NvInfer.h:10325
void setMemoryPoolLimit(MemoryPoolType pool, std::size_t poolSize) noexcept
Set the memory size for the memory pool.
Definition: NvInfer.h:10762
bool setComputeCapability(ComputeCapability computeCapability, int32_t index) noexcept
Set one compute capability for runtime execution.
Definition: NvInfer.h:11146
bool setNbComputeCapabilities(int32_t maxNbComputeCapabilities) noexcept
Set the number of compute capabilities.
Definition: NvInfer.h:11116
TRT_DEPRECATED bool setTimingCache(ITimingCache const &cache, bool ignoreMismatch) noexcept
Attach a timing cache to IBuilderConfig.
Definition: NvInfer.h:10718
void setPreviewFeature(PreviewFeature feature, bool enable) noexcept
Enable or disable a specific preview feature.
Definition: NvInfer.h:10799
bool getPreviewFeature(PreviewFeature feature) const noexcept
Get status of preview feature.
Definition: NvInfer.h:10813
int32_t getBuilderOptimizationLevel() noexcept
Get builder optimization level.
Definition: NvInfer.h:10858
bool setTacticSources(TacticSources tacticSources) noexcept
Set tactic sources.
Definition: NvInfer.h:10656
void setPluginsToSerialize(char const *const *paths, int32_t nbPaths) noexcept
Set the plugin libraries to be serialized with version-compatible engines.
Definition: NvInfer.h:10901
bool setTilingOptimizationLevel(TilingOptimizationLevel level) noexcept
Set the Tiling optimization level.
Definition: NvInfer.h:11057
bool setL2LimitForTiling(int64_t size) noexcept
Set the L2 cache usage limit for Tiling optimization.
Definition: NvInfer.h:11085
std::size_t getMemoryPoolLimit(MemoryPoolType pool) const noexcept
Get the memory size limit of the memory pool.
Definition: NvInfer.h:10781
int32_t getDLACore() const noexcept
Get the DLA core that the engine executes on.
Definition: NvInfer.h:10524
int32_t getNbPluginsToSerialize() const noexcept
Get the number of plugin library paths to be serialized with version-compatible engines.
Definition: NvInfer.h:10924
void setDeviceType(ILayer const *layer, DeviceType deviceType) noexcept
Set the device that this layer must execute on.
Definition: NvInfer.h:10456
void setEngineCapability(EngineCapability capability) noexcept
Configure the builder to target specified EngineCapability flow.
Definition: NvInfer.h:10362
int32_t getMaxAuxStreams() const noexcept
Get the maximum number of auxiliary streams that TRT is allowed to use.
Definition: NvInfer.h:10963
bool getFlag(BuilderFlag builderFlag) const noexcept
Returns true if the build mode flag is set.
Definition: NvInfer.h:10439
void setMaxNbTactics(int32_t maxNbTactics) noexcept
Set the maximum number of tactics to time when there is a choice of tactics.
Definition: NvInfer.h:11029
int64_t getL2LimitForTiling() const noexcept
Get the L2 cache usage limit for tiling optimization.
Definition: NvInfer.h:11097
void setProgressMonitor(IProgressMonitor *monitor) noexcept
Sets the progress monitor for building a network.
Definition: NvInfer.h:10979
void setProfilingVerbosity(ProfilingVerbosity verbosity) noexcept
Set verbosity level of layer information exposed in NVTX annotations and IEngineInspector.
Definition: NvInfer.h:10621
int32_t getNbOptimizationProfiles() const noexcept
Get number of optimization profiles.
Definition: NvInfer.h:10609
void reset() noexcept
Resets the builder configuration to defaults.
Definition: NvInfer.h:10555
char const * getPluginToSerialize(int32_t index) const noexcept
Get the plugin library path to be serialized with version-compatible engines.
Definition: NvInfer.h:10914
EngineCapability getEngineCapability() const noexcept
Query EngineCapability flow configured for the builder.
Definition: NvInfer.h:10374
RuntimePlatform getRuntimePlatform() const noexcept
Get the target platform for runtime execution.
Definition: NvInfer.h:11017
DeviceType getDefaultDeviceType() const noexcept
Get the default DeviceType which was set by setDefaultDeviceType.
Definition: NvInfer.h:10545
void setRuntimePlatform(RuntimePlatform runtimePlatform) noexcept
Set the target platform for runtime execution.
Definition: NvInfer.h:11005
int32_t getMaxNbTactics() const noexcept
Query the maximum number of tactics timed when there is a choice.
Definition: NvInfer.h:11041
BuilderFlags getFlags() const noexcept
Get the build mode flags for this builder config. Defaults to 0.
Definition: NvInfer.h:10403
void setFlags(BuilderFlags builderFlags) noexcept
Set the build mode flags to turn on builder options for this network.
Definition: NvInfer.h:10391
TacticSources getTacticSources() const noexcept
Get tactic sources.
Definition: NvInfer.h:10671
void resetDeviceType(ILayer const *layer) noexcept
reset the DeviceType for this layer
Definition: NvInfer.h:10488
ComputeCapability getComputeCapability(int32_t index) const noexcept
Get one compute capability for runtime execution.
Definition: NvInfer.h:11160
void setDLACore(int32_t dlaCore) noexcept
Sets the DLA core used by the network. Defaults to -1.
Definition: NvInfer.h:10514
HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept
Get the hardware compatibility level.
Definition: NvInfer.h:10888
int32_t getNbComputeCapabilities() const noexcept
Get the number of compute capabilities.
Definition: NvInfer.h:11128
void clearFlag(BuilderFlag builderFlag) noexcept
clear a single build mode flag.
Definition: NvInfer.h:10415
int32_t addOptimizationProfile(IOptimizationProfile const *profile) noexcept
Add an optimization profile.
Definition: NvInfer.h:10596
IProgressMonitor * getProgressMonitor() const noexcept
Definition: NvInfer.h:10989
apiv::VBuilderConfig * mImpl
Definition: NvInfer.h:11166
int32_t getAvgTimingIterations() const noexcept
Query the number of averaging iterations.
Definition: NvInfer.h:10349
void setDefaultDeviceType(DeviceType deviceType) noexcept
Sets the default DeviceType to be used by the builder. It ensures that all the layers that can run on...
Definition: NvInfer.h:10535
void setFlag(BuilderFlag builderFlag) noexcept
Set a single build mode flag.
Definition: NvInfer.h:10427
TRT_DEPRECATED nvinfer1::ITimingCache * createTimingCache(void const *blob, std::size_t size) const noexcept
Create timing cache.
Definition: NvInfer.h:10693
virtual ~IBuilderConfig() noexcept=default
DeviceType getDeviceType(ILayer const *layer) const noexcept
Get the device that this layer executes on.
Definition: NvInfer.h:10466
bool canRunOnDLA(ILayer const *layer) const noexcept
Checks if a layer can run on DLA.
Definition: NvInfer.h:10498
cudaStream_t getProfileStream() const noexcept
Get the CUDA stream that is used to profile this network.
Definition: NvInfer.h:10579
void setHardwareCompatibilityLevel(HardwareCompatibilityLevel hardwareCompatibilityLevel) noexcept
Set the hardware compatibility level.
Definition: NvInfer.h:10875
TilingOptimizationLevel getTilingOptimizationLevel() const noexcept
Get the Tiling optimization level.
Definition: NvInfer.h:11069
void setMaxAuxStreams(int32_t nbStreams) noexcept
Set the maximum number of auxiliary streams that TRT is allowed to use.
Definition: NvInfer.h:10953
ProfilingVerbosity getProfilingVerbosity() const noexcept
Get verbosity level of layer information exposed in NVTX annotations and IEngineInspector.
Definition: NvInfer.h:10634
TRT_DEPRECATED nvinfer1::ITimingCache const * getTimingCache() const noexcept
Get the pointer to the timing cache from current IBuilderConfig.
Definition: NvInfer.h:10730
bool isDeviceTypeSet(ILayer const *layer) const noexcept
whether the DeviceType has been explicitly set for this layer
Definition: NvInfer.h:10478
void setBuilderOptimizationLevel(int32_t level) noexcept
Set builder optimization level.
Definition: NvInfer.h:10846
void setProfileStream(const cudaStream_t stream) noexcept
Set the CUDA stream that is used to profile this network.
Definition: NvInfer.h:10567
Builds an engine from a network definition.
Definition: NvInfer.h:11219
int32_t getNbDLACores() const noexcept
Return the number of DLA engines available to this builder.
Definition: NvInfer.h:11238
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInfer.h:11345
apiv::VBuilder * mImpl
Definition: NvInfer.h:11477
ILogger * getLogger() const noexcept
get the logger with which the builder was created
Definition: NvInfer.h:11431
bool isNetworkSupported(INetworkDefinition const &network, IBuilderConfig const &config) const noexcept
Checks that a network is within the scope of the IBuilderConfig settings.
Definition: NvInfer.h:11421
int32_t getMaxThreads() const noexcept
get the maximum number of threads that can be used by the builder.
Definition: NvInfer.h:11461
IPluginRegistry & getPluginRegistry() noexcept
get the local plugin registry that can be used by the builder.
Definition: NvInfer.h:11471
nvinfer1::IOptimizationProfile * createOptimizationProfile() noexcept
Create a new optimization profile.
Definition: NvInfer.h:11311
void setGpuAllocator(IGpuAllocator *allocator) noexcept
Set the GPU allocator.
Definition: NvInfer.h:11256
nvinfer1::INetworkDefinition * createNetworkV2(NetworkDefinitionCreationFlags flags) noexcept
Create a network definition object.
Definition: NvInfer.h:11296
nvinfer1::IBuilderConfig * createBuilderConfig() noexcept
Create a builder configuration object.
Definition: NvInfer.h:11270
void reset() noexcept
Resets the builder state to default values.
Definition: NvInfer.h:11353
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInfer.h:11447
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInfer.h:11330
nvinfer1::IHostMemory * buildSerializedNetwork(INetworkDefinition &network, IBuilderConfig &config) noexcept
Builds and serializes a network for the given INetworkDefinition and IBuilderConfig.
Definition: NvInfer.h:11372
virtual ~IBuilder() noexcept=default
bool buildSerializedNetworkToStream(INetworkDefinition &network, IBuilderConfig &config, IStreamWriter &writer) noexcept
Builds and serializes a network into stream for the given INetworkDefinition and IBuilderConfig.
Definition: NvInfer.h:11393
A cast layer in a network.
Definition: NvInfer.h:3865
virtual ~ICastLayer() noexcept=default
apiv::VCastLayer * mImpl
Definition: NvInfer.h:3891
DataType getToType() const noexcept
Return cast layer output type.
Definition: NvInfer.h:3885
void setToType(DataType toType) noexcept
Set cast layer output type.
Definition: NvInfer.h:3874
A concatenation layer in a network definition.
Definition: NvInfer.h:2012
void setAxis(int32_t axis) noexcept
Set the axis along which concatenation occurs.
Definition: NvInfer.h:2025
int32_t getAxis() const noexcept
Get the axis along which concatenation occurs.
Definition: NvInfer.h:2035
virtual ~IConcatenationLayer() noexcept=default
This layer represents a condition input to an IIfConditional.
Definition: NvInfer.h:4528
virtual ~IConditionLayer() noexcept=default
Layer that represents a constant value.
Definition: NvInfer.h:3904
void setWeights(Weights weights) noexcept
Set the weights for the layer.
Definition: NvInfer.h:3914
Weights getWeights() const noexcept
Get the weights for the layer.
Definition: NvInfer.h:3924
void setDimensions(Dims const &dimensions) noexcept
Set the dimensions for the layer.
Definition: NvInfer.h:3936
apiv::VConstantLayer * mImpl
Definition: NvInfer.h:3954
virtual ~IConstantLayer() noexcept=default
Dims getDimensions() const noexcept
Get the dimensions for the layer.
Definition: NvInfer.h:3948
A convolution layer in a network definition.
Definition: NvInfer.h:982
Dims getPrePadding() const noexcept
Get the pre-padding.
Definition: NvInfer.h:1107
Weights getBiasWeights() const noexcept
Get the bias weights for the convolution.
Definition: NvInfer.h:1080
void setPaddingMode(PaddingMode paddingMode) noexcept
Set the padding mode.
Definition: NvInfer.h:1148
void setDilationNd(Dims const &dilation) noexcept
Set the multi-dimension dilation of the convolution.
Definition: NvInfer.h:1252
Dims getPaddingNd() const noexcept
Get the multi-dimension padding of the convolution.
Definition: NvInfer.h:1238
Dims getStrideNd() const noexcept
Get the multi-dimension stride of the convolution.
Definition: NvInfer.h:1208
Weights getKernelWeights() const noexcept
Get the kernel weights of the convolution.
Definition: NvInfer.h:1055
void setStrideNd(Dims const &stride) noexcept
Set the multi-dimension stride of the convolution.
Definition: NvInfer.h:1198
Dims getDilationNd() const noexcept
Get the multi-dimension dilation of the convolution.
Definition: NvInfer.h:1262
int64_t getNbOutputMaps() const noexcept
Get the number of output maps for the convolution.
Definition: NvInfer.h:1001
void setKernelWeights(Weights weights) noexcept
Set the kernel weights for the convolution.
Definition: NvInfer.h:1045
Dims getPostPadding() const noexcept
Get the post-padding.
Definition: NvInfer.h:1134
int64_t getNbGroups() const noexcept
Get the number of groups of the convolution.
Definition: NvInfer.h:1031
PaddingMode getPaddingMode() const noexcept
Get the padding mode.
Definition: NvInfer.h:1160
virtual ~IConvolutionLayer() noexcept=default
void setNbGroups(int64_t nbGroups) noexcept
Set the number of groups for a convolution.
Definition: NvInfer.h:1021
void setNbOutputMaps(int64_t nbOutputMaps) noexcept
Set the number of output maps for the convolution.
Definition: NvInfer.h:991
void setBiasWeights(Weights weights) noexcept
Set the bias weights for the convolution.
Definition: NvInfer.h:1070
Dims getKernelSizeNd() const noexcept
Get the multi-dimension kernel size of the convolution.
Definition: NvInfer.h:1183
void setPaddingNd(Dims const &padding) noexcept
Set the multi-dimension padding of the convolution.
Definition: NvInfer.h:1226
void setPrePadding(Dims const &padding) noexcept
Set the multi-dimension pre-padding of the convolution.
Definition: NvInfer.h:1097
void setPostPadding(Dims const &padding) noexcept
Set the multi-dimension post-padding of the convolution.
Definition: NvInfer.h:1124
void setKernelSizeNd(Dims const &kernelSize) noexcept
Set the multi-dimension kernel size of the convolution.
Definition: NvInfer.h:1173
Layer that represents a cumulative operation across a tensor.
Definition: NvInfer.h:6719
bool setOperation(CumulativeOperation op) noexcept
Set the cumulative operation for the layer.
Definition: NvInfer.h:6730
void setReverse(bool reverse) noexcept
Specify whether the cumulative operation should be applied backward.
Definition: NvInfer.h:6778
apiv::VCumulativeLayer * mImpl
Definition: NvInfer.h:6796
bool getExclusive() const noexcept
Get whether it is exclusive accumulation or inclusive accumulation.
Definition: NvInfer.h:6766
virtual ~ICumulativeLayer() noexcept=default
bool getReverse() const noexcept
Get the boolean that specifies whether the cumulative operation should be applied backward.
Definition: NvInfer.h:6790
void setExclusive(bool exclusive) noexcept
Set whether it is an exclusive accumulation or inclusive accumulation.
Definition: NvInfer.h:6754
CumulativeOperation getOperation() const noexcept
Get the cumulative operation for the layer.
Definition: NvInfer.h:6742
A deconvolution layer in a network definition.
Definition: NvInfer.h:2053
void setBiasWeights(Weights weights) noexcept
Set the bias weights for the deconvolution.
Definition: NvInfer.h:2141
int64_t getNbGroups() const noexcept
Get the number of groups for a deconvolution.
Definition: NvInfer.h:2102
Weights getKernelWeights() const noexcept
Get the kernel weights for the deconvolution.
Definition: NvInfer.h:2126
void setPrePadding(Dims const &padding) noexcept
Set the multi-dimension pre-padding of the deconvolution.
Definition: NvInfer.h:2168
Dims getStrideNd() const noexcept
Get the multi-dimension stride of the deconvolution.
Definition: NvInfer.h:2283
Dims getDilationNd() const noexcept
Get the multi-dimension dilation of the deconvolution.
Definition: NvInfer.h:2349
Weights getBiasWeights() const noexcept
Get the bias weights for the deconvolution.
Definition: NvInfer.h:2151
void setKernelWeights(Weights weights) noexcept
Set the kernel weights for the deconvolution.
Definition: NvInfer.h:2116
int64_t getNbOutputMaps() const noexcept
Get the number of output feature maps for the deconvolution.
Definition: NvInfer.h:2072
void setStrideNd(Dims const &stride) noexcept
Set the multi-dimension stride of the deconvolution.
Definition: NvInfer.h:2273
Dims getPostPadding() const noexcept
Get the padding.
Definition: NvInfer.h:2205
Dims getKernelSizeNd() const noexcept
Get the multi-dimension kernel size of the deconvolution.
Definition: NvInfer.h:2256
void setPostPadding(Dims const &padding) noexcept
Set the multi-dimension post-padding of the deconvolution.
Definition: NvInfer.h:2195
void setKernelSizeNd(Dims const &kernelSize) noexcept
Set the multi-dimension kernel size of the deconvolution.
Definition: NvInfer.h:2246
virtual ~IDeconvolutionLayer() noexcept=default
void setPaddingNd(Dims const &padding) noexcept
Set the multi-dimension padding of the deconvolution.
Definition: NvInfer.h:2301
void setNbOutputMaps(int64_t nbOutputMaps) noexcept
Set the number of output feature maps for the deconvolution.
Definition: NvInfer.h:2062
Dims getPaddingNd() const noexcept
Get the multi-dimension padding of the deconvolution.
Definition: NvInfer.h:2313
void setDilationNd(Dims const &dilation) noexcept
Set the multi-dimension dilation of the deconvolution.
Definition: NvInfer.h:2339
void setPaddingMode(PaddingMode paddingMode) noexcept
Set the padding mode.
Definition: NvInfer.h:2219
void setNbGroups(int64_t nbGroups) noexcept
Set the number of groups for a deconvolution.
Definition: NvInfer.h:2092
Dims getPrePadding() const noexcept
Get the pre-padding.
Definition: NvInfer.h:2178
PaddingMode getPaddingMode() const noexcept
Get the padding mode.
Definition: NvInfer.h:2231
A Dequantize layer in a network definition.
Definition: NvInfer.h:5617
TRT_NODISCARD Dims getBlockShape() const noexcept
Get the shape of the quantization block.
Definition: NvInfer.h:5666
void setToType(DataType toType) noexcept
Set the Dequantize layer output type.
Definition: NvInfer.h:5682
virtual ~IDequantizeLayer() noexcept=default
int32_t getAxis() const noexcept
Get the quantization axis.
Definition: NvInfer.h:5627
bool setBlockShape(Dims const &blockShape) noexcept
Set the shape of the quantization block.
Definition: NvInfer.h:5655
DataType getToType() const noexcept
Return the Dequantize layer output type.
Definition: NvInfer.h:5694
void setAxis(int32_t axis) noexcept
Set the quantization axis.
Definition: NvInfer.h:5638
Definition: NvInfer.h:7884
virtual ~IDistCollectiveLayer() noexcept=default
A network layer to perform dynamic quantization.
Definition: NvInfer.h:5722
DataType getScaleType() const noexcept
Return the scale factors data type.
Definition: NvInfer.h:5788
TRT_DEPRECATED void setAxis(int32_t axis) noexcept
Set the axis along which block quantization occurs.
Definition: NvInfer.h:5801
TRT_DEPRECATED void setBlockSize(int32_t size) noexcept
Set the size of the quantization block.
Definition: NvInfer.h:5824
Dims getBlockShape() const noexcept
Get the shape of the quantization block.
Definition: NvInfer.h:5859
void setScaleType(DataType scaleType) noexcept
Set the data type of the scale factors used to quantize the data.
Definition: NvInfer.h:5775
DataType getToType() const noexcept
Return DynamicQuantizeLayer's quantized output type.
Definition: NvInfer.h:5762
TRT_DEPRECATED int32_t getAxis() const noexcept
Get the axis along which blocking occurs.
Definition: NvInfer.h:5811
virtual ~IDynamicQuantizeLayer() noexcept=default
void setToType(DataType toType) noexcept
Set DynamicQuantizeLayer's quantized output type.
Definition: NvInfer.h:5749
void setBlockShape(Dims const &blockShape) noexcept
Set the shape of the quantization block.
Definition: NvInfer.h:5847
TRT_DEPRECATED int32_t getBlockSize() const noexcept
Get the size of the quantization block.
Definition: NvInfer.h:5834
An Einsum layer in a network.
Definition: NvInfer.h:5904
bool setEquation(char const *equation) noexcept
Set the equation. The equation is a comma-separated list of subscript labels, where each label refers...
Definition: NvInfer.h:5915
virtual ~IEinsumLayer() noexcept=default
char const * getEquation() const noexcept
Return the equation.
Definition: NvInfer.h:5925
A elementwise layer in a network definition.
Definition: NvInfer.h:2423
virtual ~IElementWiseLayer() noexcept=default
apiv::VElementWiseLayer * mImpl
Definition: NvInfer.h:2452
ElementWiseOperation getOperation() const noexcept
Get the binary operation for the layer.
Definition: NvInfer.h:2446
void setOperation(ElementWiseOperation op) noexcept
Set the binary operation for the layer.
Definition: NvInfer.h:2434
Generate a tensor according to a specified mode.
Definition: NvInfer.h:5118
bool isAlphaBetaInt64() const noexcept
Return true if alpha/beta have type int64, false if they have type double.
Definition: NvInfer.h:5350
FillOperation getOperation() const noexcept
Get the fill operation for the layer.
Definition: NvInfer.h:5164
void setOperation(FillOperation op) noexcept
Set the fill operation for the layer.
Definition: NvInfer.h:5154
DataType getToType() const noexcept
Get the fill layer output type.
Definition: NvInfer.h:5380
void setAlphaInt64(int64_t alpha) noexcept
Set the alpha parameter with int64 datatype.
Definition: NvInfer.h:5293
void setBetaInt64(int64_t beta) noexcept
Set the beta parameter with int64 datatype.
Definition: NvInfer.h:5327
void setBeta(double beta) noexcept
Set the beta parameter.
Definition: NvInfer.h:5217
int64_t getAlphaInt64() const noexcept
Get the value of alpha parameter with int64 datatype.
Definition: NvInfer.h:5308
int64_t getBetaInt64() const noexcept
Get the value of beta parameter with int64 datatype.
Definition: NvInfer.h:5342
double getAlpha() const noexcept
Get the value of alpha parameter.
Definition: NvInfer.h:5198
void setDimensions(Dims const &dimensions) noexcept
Set the output tensor's dimensions.
Definition: NvInfer.h:5129
void setAlpha(double alpha) noexcept
Set the alpha parameter.
Definition: NvInfer.h:5183
void setToType(DataType toType) noexcept
Set the fill layer output type.
Definition: NvInfer.h:5368
Dims getDimensions() const noexcept
Get the output tensor's dimensions.
Definition: NvInfer.h:5144
double getBeta() const noexcept
Get the value of beta parameter.
Definition: NvInfer.h:5232
virtual ~IFillLayer() noexcept=default
A Gather layer in a network definition. Supports several kinds of gathering.
Definition: NvInfer.h:2556
void setGatherAxis(int32_t axis) noexcept
Set the axis used by GatherMode::kELEMENTS and GatherMode::kDEFAULT The axis must be less than the nu...
Definition: NvInfer.h:2567
void setNbElementWiseDims(int32_t elementWiseDims) noexcept
Set the number of leading dimensions of indices tensor to be handled elementwise.
Definition: NvInfer.h:2602
apiv::VGatherLayer * mImpl
Definition: NvInfer.h:2638
int32_t getNbElementWiseDims() const noexcept
Get the number of leading dimensions of indices tensor to be handled elementwise.
Definition: NvInfer.h:2612
void setMode(GatherMode mode) noexcept
Set the gather mode.
Definition: NvInfer.h:2622
int32_t getGatherAxis() const noexcept
Get the axis to gather on.
Definition: NvInfer.h:2579
GatherMode getMode() const noexcept
Get the gather mode.
Definition: NvInfer.h:2632
virtual ~IGatherLayer() noexcept=default
A GridSample layer in a network definition.
Definition: NvInfer.h:6126
void setInterpolationMode(InterpolationMode mode) noexcept
Set the grid sample interpolation mode.
Definition: NvInfer.h:6133
bool setSampleMode(SampleMode mode) noexcept
Set the sample mode.
Definition: NvInfer.h:6179
void setAlignCorners(bool alignCorners) noexcept
Set the align corners mode.
Definition: NvInfer.h:6155
apiv::VGridSampleLayer * mImpl
Definition: NvInfer.h:6197
SampleMode getSampleMode() const noexcept
Get the sample mode.
Definition: NvInfer.h:6191
InterpolationMode getInterpolationMode() const noexcept
Get the grid sample interpolation mode.
Definition: NvInfer.h:6145
bool getAlignCorners() const noexcept
Get the align corners mode.
Definition: NvInfer.h:6167
virtual ~IGridSampleLayer() noexcept=default
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:142
A layer that represents the identity function.
Definition: NvInfer.h:3852
apiv::VIdentityLayer * mImpl
Definition: NvInfer.h:3854
virtual ~IIdentityLayer() noexcept=default
This is a base class for Conditional boundary layers.
Definition: NvInfer.h:4507
IIfConditional * getConditional() const noexcept
Get a pointer to the IIfConditional associated with this boundary layer.
Definition: NvInfer.h:4512
virtual ~IIfConditionalBoundaryLayer() noexcept=default
Helper for constructing conditionally-executed subgraphs.
Definition: NvInfer.h:4590
IIfConditionalInputLayer * addInput(ITensor &input) noexcept
Add an If-conditional input.
Definition: NvInfer.h:4631
char const * getName() const noexcept
Return the name of the conditional.
Definition: NvInfer.h:4656
virtual ~IIfConditional() noexcept=default
IConditionLayer * setCondition(ITensor &condition) noexcept
Set the condition tensor for this If-Conditional construct.
Definition: NvInfer.h:4601
IIfConditionalOutputLayer * addOutput(ITensor &trueSubgraphOutput, ITensor &falseSubgraphOutput) noexcept
Add an If-conditional output.
Definition: NvInfer.h:4619
void setName(char const *name) noexcept
Set the name of the conditional.
Definition: NvInfer.h:4646
This layer represents an input to an IIfConditional.
Definition: NvInfer.h:4558
virtual ~IIfConditionalInputLayer() noexcept=default
This layer represents an output of an IIfConditional.
Definition: NvInfer.h:4545
virtual ~IIfConditionalOutputLayer() noexcept=default
A layer to do iterations.
Definition: NvInfer.h:4821
virtual ~IIteratorLayer() noexcept=default
void setReverse(bool reverse) noexcept
Set iteration order to be reverse.
Definition: NvInfer.h:4848
bool getReverse() const noexcept
Check if the iteration order is reverse.
Definition: NvInfer.h:4858
int32_t getAxis() const noexcept
Get axis being iterated over.
Definition: NvInfer.h:4834
void setAxis(int32_t axis) noexcept
Set axis to iterate over.
Definition: NvInfer.h:4826
Layer that represents a KVCacheUpdate operation.
Definition: NvInfer.h:7386
bool setCacheMode(KVCacheMode cacheMode) noexcept
Set the mode of the KVCacheUpdate layer.
Definition: NvInfer.h:7409
virtual ~IKVCacheUpdateLayer() noexcept=default
KVCacheMode getCacheMode() const noexcept
Get the mode of the KVCacheUpdate layer.
Definition: NvInfer.h:7419
apiv::VKVCacheUpdateLayer * mImpl
Definition: NvInfer.h:7425
A LRN layer in a network definition.
Definition: NvInfer.h:1667
int64_t getWindowSize() const noexcept
Get the LRN window size.
Definition: NvInfer.h:1688
float getAlpha() const noexcept
Get the LRN alpha value.
Definition: NvInfer.h:1710
void setWindowSize(int64_t windowSize) noexcept
Set the LRN window size.
Definition: NvInfer.h:1678
void setK(float k) noexcept
Set the LRN K value.
Definition: NvInfer.h:1744
void setAlpha(float alpha) noexcept
Set the LRN alpha value.
Definition: NvInfer.h:1700
void setBeta(float beta) noexcept
Set the LRN beta value.
Definition: NvInfer.h:1722
virtual ~ILRNLayer() noexcept=default
float getBeta() const noexcept
Get the LRN beta value.
Definition: NvInfer.h:1732
float getK() const noexcept
Get the LRN K value.
Definition: NvInfer.h:1754
Base class for all layer classes in a network definition.
Definition: NvInfer.h:464
TRT_DEPRECATED void setPrecision(DataType dataType) noexcept
Set the preferred or required computational precision of this layer in a weakly-typed network.
Definition: NvInfer.h:584
TRT_DEPRECATED void setOutputType(int32_t index, DataType dataType) noexcept
Set the output type of this layer in a weakly-typed network.
Definition: NvInfer.h:672
TRT_DEPRECATED bool precisionIsSet() const noexcept
whether the computational precision has been set for this layer
Definition: NvInfer.h:610
void setMetadata(char const *metadata) noexcept
Set the metadata for this layer.
Definition: NvInfer.h:735
TRT_DEPRECATED void resetOutputType(int32_t index) noexcept
reset the output type for this layer
Definition: NvInfer.h:717
void setName(char const *name) noexcept
Set the name of a layer.
Definition: NvInfer.h:485
int32_t getNbInputs() const noexcept
Get the number of inputs of a layer.
Definition: NvInfer.h:503
int32_t getNbRanks() const noexcept
Get the number of ranks for multi-device execution.
Definition: NvInfer.h:781
char const * getMetadata() const noexcept
Get the metadata of the layer.
Definition: NvInfer.h:748
DataType getOutputType(int32_t index) const noexcept
get the output type of this layer
Definition: NvInfer.h:687
DataType getPrecision() const noexcept
get the computational precision of this layer
Definition: NvInfer.h:596
TRT_DEPRECATED bool outputTypeIsSet(int32_t index) const noexcept
whether the output type has been set for this layer
Definition: NvInfer.h:703
char const * getName() const noexcept
Return the name of a layer.
Definition: NvInfer.h:495
int32_t getNbOutputs() const noexcept
Get the number of outputs of a layer.
Definition: NvInfer.h:524
ITensor * getOutput(int32_t index) const noexcept
Get the layer output corresponding to the given index.
Definition: NvInfer.h:534
void setInput(int32_t index, ITensor &tensor) noexcept
Replace an input of this layer with a specific tensor.
Definition: NvInfer.h:551
ITensor * getInput(int32_t index) const noexcept
Get the layer input corresponding to the given index.
Definition: NvInfer.h:516
bool setNbRanks(int32_t nbRanks) noexcept
Set the number of ranks for multi-device execution.
Definition: NvInfer.h:769
LayerType getType() const noexcept
Return the type of a layer.
Definition: NvInfer.h:471
TRT_DEPRECATED void resetPrecision() noexcept
reset the computational precision for this layer
Definition: NvInfer.h:622
virtual ~ILayer() noexcept=default
Application-implemented logging interface for the builder, refitter and runtime.
Definition: NvInferRuntime.h:1588
This is a base class for Loop boundary layers.
Definition: NvInfer.h:4484
virtual ~ILoopBoundaryLayer() noexcept=default
ILoop * getLoop() const noexcept
Get a pointer to ILoop associated with this boundary layer.
Definition: NvInfer.h:4489
Helper for creating a recurrent subgraph.
Definition: NvInfer.h:4879
void setName(char const *name) noexcept
Set the name of the loop.
Definition: NvInfer.h:4949
ITripLimitLayer * addTripLimit(ITensor &tensor, TripLimit limit) noexcept
Add a trip-count limiter, based on the given tensor.
Definition: NvInfer.h:4908
IIteratorLayer * addIterator(ITensor &tensor, int32_t axis=0, bool reverse=false) noexcept
Return layer that subscripts tensor by loop iteration.
Definition: NvInfer.h:4921
ILoopOutputLayer * addLoopOutput(ITensor &tensor, LoopOutput outputKind, int32_t axis=0) noexcept
Make an output for this loop, based on the given tensor.
Definition: NvInfer.h:4934
virtual ~ILoop() noexcept=default
char const * getName() const noexcept
Return the name of the loop.
Definition: NvInfer.h:4959
IRecurrenceLayer * addRecurrence(ITensor &initialValue) noexcept
Create a recurrence layer for this loop with initialValue as its first input.
Definition: NvInfer.h:4887
An ILoopOutputLayer is the sole way to get output from a loop.
Definition: NvInfer.h:4721
virtual ~ILoopOutputLayer() noexcept=default
int32_t getAxis() const noexcept
Get axis being concatenated over.
Definition: NvInfer.h:4751
LoopOutput getLoopOutput() const noexcept
Get which kind a loop output has.
Definition: NvInfer.h:4726
void setAxis(int32_t axis) noexcept
Set where to insert the contenation axis. Ignored if getLoopOutput() is kLAST_VALUE.
Definition: NvInfer.h:4743
Layer that represents a Matrix Multiplication.
Definition: NvInfer.h:3699
apiv::VMatrixMultiplyLayer * mImpl
Definition: NvInfer.h:3727
virtual ~IMatrixMultiplyLayer() noexcept=default
MatrixOperation getOperation(int32_t index) const noexcept
Get the operation for an input tensor.
Definition: NvInfer.h:3721
void setOperation(int32_t index, MatrixOperation op) noexcept
Set the operation for an input tensor.
Definition: NvInfer.h:3709
A MoE layer in a network definition. Mixture of Experts (MoE) is a collection of experts with each ex...
Definition: NvInfer.h:7568
void setSwigluParamLimit(float limit) noexcept
Set the SwiGLU parameter limit.
Definition: NvInfer.h:7783
void setDynQOutputScaleType(DataType type) noexcept
Set the dynamic quantization output scale type.
Definition: NvInfer.h:7736
MoEActType getActivationType() const noexcept
Get the activation type for the MoE layer.
Definition: NvInfer.h:7617
void setQuantizationToType(DataType type) noexcept
Set the data type the mul output is quantized to.
Definition: NvInfer.h:7684
void setQuantizationDynamicDblQ(ITensor &fcDownActivationDblQScale, DataType dataType, Dims const &blockShape, DataType dynQOutputScaleType) noexcept
Configure dynamic quantization (with double quantization) after the mul op. ┌── fcGate ── activation ...
Definition: NvInfer.h:7669
void setQuantizationStatic(ITensor &fcDownActivationScale, DataType dataType) noexcept
Configure static quantization after the mul op. ┌── fcGate ── activation ───┐ │ │ hiddenStates ───┤ ├...
Definition: NvInfer.h:7640
virtual ~IMoELayer() noexcept=default
float getSwigluParamLimit() const noexcept
Get the SwiGLU parameter limit.
Definition: NvInfer.h:7795
DataType getQuantizationToType() const noexcept
Get the data type the mul in MoE layer is quantized to.
Definition: NvInfer.h:7696
DataType getDynQOutputScaleType() const noexcept
Get the dynamic quantization output scale type.
Definition: NvInfer.h:7748
void setActivationType(MoEActType activationType) noexcept
Set the activation type for the MoE layer.
Definition: NvInfer.h:7605
Dims getQuantizationBlockShape() const noexcept
Get the block shape for the quantization of the Mul output.
Definition: NvInfer.h:7724
void setGatedWeights(ITensor &fcGateWeights, ITensor &fcUpWeights, ITensor &fcDownWeights, MoEActType activationType) noexcept
Set the weights of the experts when each expert is a GLU (gated linear unit). In each GLU,...
Definition: NvInfer.h:7581
float getSwigluParamBeta() const noexcept
Get the SwiGLU parameter beta.
Definition: NvInfer.h:7847
void setSwigluParamBeta(float beta) noexcept
Set the SwiGLU parameter beta.
Definition: NvInfer.h:7835
void setGatedBiases(ITensor &fcGateBiases, ITensor &fcUpBiases, ITensor &fcDownBiases) noexcept
Set the biases of the experts when each expert is a GLU (gated linear unit). In each GLU,...
Definition: NvInfer.h:7593
void setSwigluParams(float limit, float alpha, float beta) noexcept
Set the SwiGLU parameters.
Definition: NvInfer.h:7769
void setQuantizationBlockShape(Dims const &blockShape) noexcept
Set the block shape for the quantization of the Mul output.
Definition: NvInfer.h:7712
void setInput(int32_t index, ITensor &tensor) noexcept
Set the input of the MoE layer.
Definition: NvInfer.h:7864
float getSwigluParamAlpha() const noexcept
Get the SwiGLU parameter alpha.
Definition: NvInfer.h:7821
void setSwigluParamAlpha(float alpha) noexcept
Set the SwiGLU parameter alpha.
Definition: NvInfer.h:7809
A non-maximum suppression layer in a network definition.
Definition: NvInfer.h:6278
virtual ~INMSLayer() noexcept=default
void setTopKBoxLimit(int32_t limit) noexcept
Set the TopK box limit parameter for the layer.
Definition: NvInfer.h:6315
void setBoundingBoxFormat(BoundingBoxFormat fmt) noexcept
Set the bounding box format parameter for the layer.
Definition: NvInfer.h:6289
BoundingBoxFormat getBoundingBoxFormat() const noexcept
Get the bounding box format parameter for the layer.
Definition: NvInfer.h:6301
bool setIndicesType(DataType type) noexcept
Set the indices type for the layer.
Definition: NvInfer.h:6360
apiv::VNMSLayer * mImpl
Definition: NvInfer.h:6378
int32_t getTopKBoxLimit() const noexcept
Get the TopK box limit parameter for the layer.
Definition: NvInfer.h:6325
DataType getIndicesType() const noexcept
Return the NMS layer indices type.
Definition: NvInfer.h:6372
A network definition for input to the builder.
Definition: NvInfer.h:7908
IConcatenationLayer * addConcatenation(ITensor *const *inputs, int32_t nbInputs) noexcept
Add a concatenation layer to the network.
Definition: NvInfer.h:8136
IShuffleLayer * addShuffle(ITensor &input) noexcept
Add a shuffle layer to the network.
Definition: NvInfer.h:8199
void setName(char const *name) noexcept
Sets the name of the network.
Definition: NvInfer.h:8625
ITopKLayer * addTopK(ITensor &input, TopKOperation op, int32_t k, uint32_t reduceAxes, DataType indicesType) noexcept
Add a TopK layer to the network.
Definition: NvInfer.h:8395
bool markDebug(ITensor &tensor) noexcept
Mark a tensor as a debug tensor.
Definition: NvInfer.h:7979
ILRNLayer * addLRN(ITensor &input, int64_t window, float alpha, float beta, float k) noexcept
Add a LRN layer to the network.
Definition: NvInfer.h:8080
ICumulativeLayer * addCumulative(ITensor &input, ITensor &axis, CumulativeOperation operation, bool exclusive, bool reverse) noexcept
Add a cumulative layer to the network.
Definition: NvInfer.h:9292
IAssertionLayer * addAssertion(ITensor &condition, char const *message) noexcept
Add an assertion layer to the network.
Definition: NvInfer.h:8927
TRT_DEPRECATED INonZeroLayer * addNonZero(ITensor &input) noexcept
Add a nonzero layer to the network.
Definition: NvInfer.h:8486
IConvolutionLayer * addConvolutionNd(ITensor &input, int64_t nbOutputMaps, Dims const &kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
Add a multi-dimension convolution layer to the network.
Definition: NvInfer.h:8746
ICastLayer * addCast(ITensor &input, DataType toType) noexcept
Add a cast layer.
Definition: NvInfer.h:8555
IScaleLayer * addScaleNd(ITensor &input, ScaleMode mode, Weights shift, Weights scale, Weights power, int32_t channelAxis) noexcept
Add a multi-dimension scale layer to the network.
Definition: NvInfer.h:8825
char const * getName() const noexcept
Returns the name associated with the network.
Definition: NvInfer.h:8639
IParametricReLULayer * addParametricReLU(ITensor &input, ITensor &slope) noexcept
Add a parametric ReLU layer to the network.
Definition: NvInfer.h:8724
ITensor * getOutput(int32_t index) const noexcept
Get the output tensor specified by the given index.
Definition: NvInfer.h:8300
ITensor * getInput(int32_t index) const noexcept
Get the input tensor specified by the given index.
Definition: NvInfer.h:8270
TRT_DEPRECATED ITopKLayer * addTopK(ITensor &input, TopKOperation op, int32_t k, uint32_t reduceAxes) noexcept
Add a TopK layer to the network.
Definition: NvInfer.h:8362
IDequantizeLayer * addDequantize(ITensor &input, ITensor &scale, DataType outputType) noexcept
Add a dequantization layer to the network.
Definition: NvInfer.h:9050
bool unmarkOutputForShapes(ITensor &tensor) noexcept
Undo markOutputForShapes.
Definition: NvInfer.h:8706
IFillLayer * addFill(Dims const &dimensions, FillOperation op, DataType outputType) noexcept
Add a fill layer to the network.
Definition: NvInfer.h:8953
ILoop * addLoop() noexcept
Add a loop to the network.
Definition: NvInfer.h:8856
bool markUnfusedTensorsAsDebugTensors() noexcept
Mark unfused tensors as debug tensors.
Definition: NvInfer.h:8027
TRT_NODISCARD INormalizationLayer * addNormalizationV2(ITensor &input, ITensor &scale, ITensor &bias, uint32_t axesMask) noexcept
Add a normalization layer to the network.
Definition: NvInfer.h:9542
IActivationLayer * addActivation(ITensor &input, ActivationType type) noexcept
Add an activation layer to the network.
Definition: NvInfer.h:8061
ISliceLayer * addSlice(ITensor &input, Dims const &start, Dims const &size, Dims const &stride) noexcept
Add a slice layer to the network.
Definition: NvInfer.h:8601
virtual ~INetworkDefinition() noexcept=default
virtual IBuilder & getBuilder() const noexcept
Return the builder from which this INetworkDefinition was created.
Definition: NvInfer.h:9438
ILayer * getLayer(int32_t index) const noexcept
Get the layer specified by the given index.
Definition: NvInfer.h:8242
bool isDebugTensor(ITensor const &tensor) const noexcept
Check if a tensor is marked as debug tensor.
Definition: NvInfer.h:8005
bool getFlag(NetworkDefinitionCreationFlag networkDefinitionCreationFlag) const noexcept
Returns true if the network definition creation flag is set.
Definition: NvInfer.h:8677
IIfConditional * addIfConditional() noexcept
Add an if-then-else to the network.
Definition: NvInfer.h:8871
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInfer.h:9027
ISqueezeLayer * addSqueeze(ITensor &input, ITensor &axes) noexcept
Add a squeeze layer to the network.
Definition: NvInfer.h:9495
TRT_DEPRECATED INMSLayer * addNMS(ITensor &boxes, ITensor &scores, ITensor &maxOutputBoxesPerClass) noexcept
Add a non-maximum suppression layer to the network.
Definition: NvInfer.h:9201
IReverseSequenceLayer * addReverseSequence(ITensor &input, ITensor &sequenceLens) noexcept
Add a ReverseSequence layer to the network.
Definition: NvInfer.h:9238
TRT_DEPRECATED IDynamicQuantizeLayer * addDynamicQuantize(ITensor &input, int32_t axis, int32_t blockSize, DataType outputType, DataType scaleType) noexcept
Add a dynamic quantization layer to the network.
Definition: NvInfer.h:9121
int32_t getNbInputs() const noexcept
Get the number of inputs in the network.
Definition: NvInfer.h:8254
NetworkDefinitionCreationFlags getFlags() const noexcept
Get the network definition creation flags for this network definition object. Defaults to 0.
Definition: NvInfer.h:8665
IQuantizeLayer * addQuantize(ITensor &input, ITensor &scale, DataType outputType) noexcept
Add a quantization layer to the network.
Definition: NvInfer.h:9094
IDynamicQuantizeLayer * addDynamicQuantizeV2(ITensor &input, Dims const &blockShape, DataType outputType, DataType scaleType) noexcept
Add a dynamic quantization layer to the network.
Definition: NvInfer.h:9145
IReduceLayer * addReduce(ITensor &input, ReduceOperation operation, uint32_t reduceAxes, bool keepDimensions) noexcept
Add a reduce layer to the network.
Definition: NvInfer.h:8326
IUnaryLayer * addUnary(ITensor &input, UnaryOperation operation) noexcept
Add a unary layer to the network.
Definition: NvInfer.h:8185
IGridSampleLayer * addGridSample(ITensor &input, ITensor &grid) noexcept
Add a GridSample layer to the network.
Definition: NvInfer.h:9179
void removeTensor(ITensor &tensor) noexcept
remove a tensor from the network definition.
Definition: NvInfer.h:8570
bool areWeightsMarkedRefittable(char const *name) const noexcept
Whether the weight has been marked as refittable.
Definition: NvInfer.h:9476
ISelectLayer * addSelect(ITensor &condition, ITensor &thenInput, ITensor &elseInput) noexcept
Add a select layer to the network.
Definition: NvInfer.h:8910
IScatterLayer * addScatter(ITensor &data, ITensor &indices, ITensor &updates, ScatterMode mode) noexcept
Add a Scatter layer to the network with specified mode and axis=0.
Definition: NvInfer.h:9070
TRT_DEPRECATED INormalizationLayer * addNormalization(ITensor &input, ITensor &scale, ITensor &bias, uint32_t axesMask) noexcept
Add a normalization layer to the network.
Definition: NvInfer.h:9270
int32_t getNbLayers() const noexcept
Get the number of layers in the network.
Definition: NvInfer.h:8228
apiv::VNetworkDefinition * mImpl
Definition: NvInfer.h:9548
IKVCacheUpdateLayer * addKVCacheUpdate(ITensor &cache, ITensor &update, ITensor &writeIndices, KVCacheMode cacheMode) noexcept
Add a KVCacheUpdate layer to the network.
Definition: NvInfer.h:9378
bool markOutputForShapes(ITensor &tensor) noexcept
Enable tensor's value to be computed by IExecutionContext::getShapeBinding.
Definition: NvInfer.h:8694
IOneHotLayer * addOneHot(ITensor &indices, ITensor &values, ITensor &depth, int32_t axis) noexcept
Add a OneHot layer to the network.
Definition: NvInfer.h:8216
IScaleLayer * addScale(ITensor &input, ScaleMode mode, Weights shift, Weights scale, Weights power) noexcept
Add a Scale layer to the network.
Definition: NvInfer.h:8106
void unmarkOutput(ITensor &tensor) noexcept
unmark a tensor as a network output.
Definition: NvInfer.h:8582
IIdentityLayer * addIdentity(ITensor &input) noexcept
Add an identity layer.
Definition: NvInfer.h:8540
IGatherLayer * addGatherV2(ITensor &data, ITensor &indices, GatherMode mode) noexcept
Add gather with specified mode, axis=0 and nbElementWiseDims=0.
Definition: NvInfer.h:8427
INonZeroLayer * addNonZero(ITensor &input, DataType indicesType) noexcept
Add a nonzero layer to the network.
Definition: NvInfer.h:8502
IElementWiseLayer * addElementWise(ITensor &input1, ITensor &input2, ElementWiseOperation op) noexcept
Add an elementwise layer to the network.
Definition: NvInfer.h:8163
IConstantLayer * addConstant(Dims const &dimensions, Weights weights) noexcept
Add a constant layer to the network.
Definition: NvInfer.h:8526
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInfer.h:9012
IPoolingLayer * addPoolingNd(ITensor &input, PoolingType type, Dims const &windowSize) noexcept
Add a multi-dimension pooling layer to the network.
Definition: NvInfer.h:8766
INMSLayer * addNMS(ITensor &boxes, ITensor &scores, ITensor &maxOutputBoxesPerClass, DataType indicesType) noexcept
Add a non-maximum suppression layer to the network.
Definition: NvInfer.h:9221
IRaggedSoftMaxLayer * addRaggedSoftMax(ITensor &input, ITensor &bounds) noexcept
Add a RaggedSoftMax layer to the network.
Definition: NvInfer.h:8446
IShapeLayer * addShape(ITensor &input) noexcept
Add a shape layer to the network.
Definition: NvInfer.h:8655
IGatherLayer * addGather(ITensor &data, ITensor &indices, int32_t axis) noexcept
Add gather with mode GatherMode::kDEFAULT and specified axis and nbElementWiseDims=0.
Definition: NvInfer.h:8411
IAttention * addAttention(ITensor &query, ITensor &key, ITensor &value, AttentionNormalizationOp normOp, bool causal) noexcept
Add an attention to the network.
Definition: NvInfer.h:9319
bool unmarkWeightsRefittable(char const *name) noexcept
Unmark weights as refittable when the builder flag kREFIT_INDIVIDUAL is set.
Definition: NvInfer.h:9463
bool markWeightsRefittable(char const *name) noexcept
Mark weights as refittable when the builder flag kREFIT_INDIVIDUAL is set.
Definition: NvInfer.h:9451
IRotaryEmbeddingLayer * addRotaryEmbedding(ITensor &input, ITensor &cosCache, ITensor &sinCache, bool interleaved, int32_t rotaryEmbeddingDim) noexcept
Add a Rotary Position Embedding (RoPE) layer to the network.
Definition: NvInfer.h:9344
IDeconvolutionLayer * addDeconvolutionNd(ITensor &input, int64_t nbOutputMaps, Dims kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
Add a multi-dimension deconvolution layer to the network.
Definition: NvInfer.h:8788
IResizeLayer * addResize(ITensor &input) noexcept
Add a resize layer to the network.
Definition: NvInfer.h:8842
IUnsqueezeLayer * addUnsqueeze(ITensor &input, ITensor &axes) noexcept
Add an unsqueeze layer to the network.
Definition: NvInfer.h:9516
IMatrixMultiplyLayer * addMatrixMultiply(ITensor &input0, MatrixOperation op0, ITensor &input1, MatrixOperation op1) noexcept
Add a MatrixMultiply layer to the network.
Definition: NvInfer.h:8467
ISoftMaxLayer * addSoftMax(ITensor &input) noexcept
Add a SoftMax layer to the network.
Definition: NvInfer.h:8119
bool unmarkDebug(ITensor &tensor) noexcept
Unmark a tensor as a debug tensor.
Definition: NvInfer.h:7995
IEinsumLayer * addEinsum(ITensor *const *inputs, int32_t nbInputs, char const *equation) noexcept
Add an Einsum layer to the network.
Definition: NvInfer.h:9161
void markOutput(ITensor &tensor) noexcept
Mark a tensor as a network output.
Definition: NvInfer.h:7961
IPaddingLayer * addPaddingNd(ITensor &input, Dims const &prePadding, Dims const &postPadding) noexcept
Add a padding layer to the network. Only 2D padding is currently supported.
Definition: NvInfer.h:8969
int32_t getNbOutputs() const noexcept
Get the number of outputs in the network.
Definition: NvInfer.h:8284
bool setWeightsName(Weights weights, char const *name) noexcept
Associate a name with all current uses of the given weights.
Definition: NvInfer.h:8993
TRT_NODISCARD IDistCollectiveLayer * addDistCollective(ITensor &input, CollectiveOperation distCollectiveOp, ReduceOperation reduceOp, int64_t root, int64_t *groups, int64_t groupSize) noexcept
Add a DistCollective layer to the network.
Definition: NvInfer.h:9426
IMoELayer * addMoE(ITensor &hiddenStates, ITensor &selectedExpertsForTokens, ITensor &scoresForSelectedExperts) noexcept
Add a MoE (Mixture of Experts) layer to the network.
Definition: NvInfer.h:9398
bool unmarkUnfusedTensorsAsDebugTensors() noexcept
Undo the marking of unfused tensors as debug tensors.
Definition: NvInfer.h:8041
Forward declaration of IEngineInspector for use by other interfaces.
Definition: NvInferRuntime.h:51
Definition: NvInfer.h:3753
DataType getIndicesType() const noexcept
Return the NonZero layer indices type.
Definition: NvInfer.h:3777
bool setIndicesType(DataType type) noexcept
Set the indices type for the layer.
Definition: NvInfer.h:3765
virtual ~INonZeroLayer() noexcept=default
A normalization layer in a network definition.
Definition: NvInfer.h:6467
float getEpsilon() const noexcept
Get the epsilon value used for the normalization calculation.
Definition: NvInfer.h:6486
TRT_DEPRECATED void setComputePrecision(DataType type) noexcept
Set the compute precision of this layer.
Definition: NvInfer.h:6565
uint32_t getAxes() const noexcept
Get the axes value used for the normalization calculation.
Definition: NvInfer.h:6506
virtual ~INormalizationLayer() noexcept=default
void setEpsilon(float eps) noexcept
Set the epsilon value used for the normalization calculation.
Definition: NvInfer.h:6476
TRT_NODISCARD bool isV2() const noexcept
Returns true if this layer was created through addNormalizationV2().
Definition: NvInfer.h:6587
apiv::VNormalizationLayer * mImpl
Definition: NvInfer.h:6593
int64_t getNbGroups() const noexcept
Get the number of groups used to split the channels for the normalization calculation.
Definition: NvInfer.h:6537
void setAxes(uint32_t axesMask) noexcept
Set the reduction axes for the normalization calculation.
Definition: NvInfer.h:6496
void setNbGroups(int64_t nbGroups) noexcept
Set the number of groups used to split the channels in the normalization calculation.
Definition: NvInfer.h:6527
TRT_DEPRECATED DataType getComputePrecision() const noexcept
Get the compute precision of this layer.
Definition: NvInfer.h:6577
A OneHot layer in a network definition.
Definition: NvInfer.h:6089
virtual ~IOneHotLayer() noexcept=default
apiv::VOneHotLayer * mImpl
Definition: NvInfer.h:6110
void setAxis(int32_t axis) noexcept
Set the axis parameter.
Definition: NvInfer.h:6096
int32_t getAxis() const noexcept
Get the value of the axis parameter.
Definition: NvInfer.h:6104
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:2672
Layer that represents a padding operation.
Definition: NvInfer.h:2950
Dims getPostPaddingNd() const noexcept
Get the padding that is applied at the end of the tensor.
Definition: NvInfer.h:2999
void setPrePaddingNd(Dims const &padding) noexcept
Set the padding that is applied at the start of the tensor.
Definition: NvInfer.h:2961
virtual ~IPaddingLayer() noexcept=default
void setPostPaddingNd(Dims const &padding) noexcept
Set the padding that is applied at the end of the tensor.
Definition: NvInfer.h:2987
Dims getPrePaddingNd() const noexcept
Get the padding that is applied at the start of the tensor.
Definition: NvInfer.h:2973
apiv::VPaddingLayer * mImpl
Definition: NvInfer.h:3005
Layer that represents a parametric ReLU operation.
Definition: NvInfer.h:3968
apiv::VParametricReLULayer * mImpl
Definition: NvInfer.h:3970
virtual ~IParametricReLULayer() noexcept=default
Single registration point for all plugins in an application. It is used to find plugin implementation...
Definition: NvInferRuntimeCommon.h:56
Plugin class for user-implemented layers.
Definition: NvInferRuntimePlugin.h:139
Layer type for pluginV2.
Definition: NvInfer.h:2654
virtual ~IPluginV2Layer() noexcept=default
apiv::VPluginV2Layer * mImpl
Definition: NvInfer.h:2667
IPluginV2 & getPlugin() noexcept
Get the plugin for the layer.
Definition: NvInfer.h:2661
Layer type for V3 plugins.
Definition: NvInfer.h:2681
virtual ~IPluginV3Layer() noexcept=default
IPluginV3 & getPlugin() noexcept
Get the plugin for the layer.
Definition: NvInfer.h:2688
apiv::VPluginV3Layer * mImpl
Definition: NvInfer.h:2694
A Pooling layer in a network definition.
Definition: NvInfer.h:1416
PoolingType getPoolingType() const noexcept
Get the type of activation to be performed.
Definition: NvInfer.h:1435
PaddingMode getPaddingMode() const noexcept
Get the padding mode.
Definition: NvInfer.h:1568
Dims getPostPadding() const noexcept
Get the padding.
Definition: NvInfer.h:1544
bool getAverageCountExcludesPadding() const noexcept
Get whether average pooling uses as a denominator the overlap area between the window and the unpadde...
Definition: NvInfer.h:1488
Dims getPrePadding() const noexcept
Get the pre-padding.
Definition: NvInfer.h:1516
void setPoolingType(PoolingType type) noexcept
Set the type of activation to be performed.
Definition: NvInfer.h:1425
void setWindowSizeNd(Dims const &windowSize) noexcept
Set the multi-dimension window size for pooling.
Definition: NvInfer.h:1581
void setPaddingMode(PaddingMode paddingMode) noexcept
Set the padding mode.
Definition: NvInfer.h:1557
Dims getWindowSizeNd() const noexcept
Get the multi-dimension window size for pooling.
Definition: NvInfer.h:1591
void setAverageCountExcludesPadding(bool exclusive) noexcept
Set whether average pooling uses as a denominator the overlap area between the window and the unpadde...
Definition: NvInfer.h:1477
void setPaddingNd(Dims const &padding) noexcept
Set the multi-dimension padding for pooling.
Definition: NvInfer.h:1635
float getBlendFactor() const noexcept
Get the blending factor for the max_average_blend mode: max_average_blendPool = (1-blendFactor)*maxPo...
Definition: NvInfer.h:1463
void setStrideNd(Dims const &stride) noexcept
Set the multi-dimension stride for pooling.
Definition: NvInfer.h:1606
Dims getStrideNd() const noexcept
Get the multi-dimension stride for pooling.
Definition: NvInfer.h:1616
virtual ~IPoolingLayer() noexcept=default
Dims getPaddingNd() const noexcept
Get the multi-dimension padding for pooling.
Definition: NvInfer.h:1647
void setPostPadding(Dims const &padding) noexcept
Set the multi-dimension post-padding for pooling.
Definition: NvInfer.h:1534
void setPrePadding(Dims const &padding) noexcept
Set the multi-dimension pre-padding for pooling.
Definition: NvInfer.h:1506
void setBlendFactor(float blendFactor) noexcept
Set the blending factor for the max_average_blend mode: max_average_blendPool = (1-blendFactor)*maxPo...
Definition: NvInfer.h:1450
A Quantize layer in a network definition.
Definition: NvInfer.h:5465
void setToType(DataType toType) noexcept
Set the Quantize layer output type.
Definition: NvInfer.h:5526
bool setBlockShape(Dims const &blockShape) noexcept
Set the shape of the quantization block.
Definition: NvInfer.h:5499
void setAxis(int32_t axis) noexcept
Set the quantization axis.
Definition: NvInfer.h:5486
TRT_NODISCARD Dims getBlockShape() const noexcept
Get the shape of the quantization block.
Definition: NvInfer.h:5510
int32_t getAxis() const noexcept
Get the quantization axis.
Definition: NvInfer.h:5475
virtual ~IQuantizeLayer() noexcept=default
DataType getToType() const noexcept
Return the Quantize layer output type.
Definition: NvInfer.h:5538
A RaggedSoftmax layer in a network definition.
Definition: NvInfer.h:3802
apiv::VRaggedSoftMaxLayer * mImpl
Definition: NvInfer.h:3804
virtual ~IRaggedSoftMaxLayer() noexcept=default
A recurrence layer in a network definition.
Definition: NvInfer.h:4674
virtual ~IRecurrenceLayer() noexcept=default
Layer that represents a reduction across a non-bool tensor.
Definition: NvInfer.h:2870
void setKeepDimensions(bool keepDimensions) noexcept
Set the boolean that specifies whether or not to keep the reduced dimensions for the layer.
Definition: NvInfer.h:2917
void setOperation(ReduceOperation op) noexcept
Set the reduce operation for the layer.
Definition: NvInfer.h:2877
ReduceOperation getOperation() const noexcept
Get the reduce operation for the layer.
Definition: NvInfer.h:2887
virtual ~IReduceLayer() noexcept=default
uint32_t getReduceAxes() const noexcept
Get the axes over which to reduce for the layer.
Definition: NvInfer.h:2907
void setReduceAxes(uint32_t reduceAxes) noexcept
Set the axes over which to reduce.
Definition: NvInfer.h:2897
apiv::VReduceLayer * mImpl
Definition: NvInfer.h:2933
bool getKeepDimensions() const noexcept
Get the boolean that specifies whether or not to keep the reduced dimensions for the layer.
Definition: NvInfer.h:2927
A resize layer in a network definition.
Definition: NvInfer.h:4157
void setSelectorForSinglePixel(ResizeSelector selector) noexcept
Set coordinate selector function when resized to single pixel.
Definition: NvInfer.h:4318
void setNearestRounding(ResizeRoundMode value) noexcept
Set rounding mode for nearest neighbor resize.
Definition: NvInfer.h:4342
virtual ~IResizeLayer() noexcept=default
int32_t getScales(int32_t size, float *scales) const noexcept
Copies resize scales to scales[0, ..., nbScales-1], where nbScales is the number of scales that were ...
Definition: NvInfer.h:4236
void setOutputDimensions(Dims const &dimensions) noexcept
Set the output dimensions.
Definition: NvInfer.h:4177
void setCubicCoeff(float A) noexcept
Set the coefficient 'A' used in cubic interpolation.
Definition: NvInfer.h:4374
void setScales(float const *scales, int32_t nbScales) noexcept
Set the resize scales.
Definition: NvInfer.h:4217
float getCubicCoeff() const noexcept
Get the coefficient 'A' used in cubic interpolation.
Definition: NvInfer.h:4384
ResizeSelector getSelectorForSinglePixel() const noexcept
Get the coordinate selector function when resized to single pixel.
Definition: NvInfer.h:4328
InterpolationMode getResizeMode() const noexcept
Get resize mode for an input tensor.
Definition: NvInfer.h:4258
void setCoordinateTransformation(ResizeCoordinateTransformation coordTransform) noexcept
Set coordinate transformation function.
Definition: NvInfer.h:4293
void setExcludeOutside(bool excludeFlag) noexcept
Set the state for excluding outside pixels.
Definition: NvInfer.h:4397
void setResizeMode(InterpolationMode interpolationMode) noexcept
Set resize mode for an input tensor.
Definition: NvInfer.h:4248
Dims getOutputDimensions() const noexcept
Get the output dimensions.
Definition: NvInfer.h:4187
ResizeRoundMode getNearestRounding() const noexcept
Get rounding mode for nearest neighbor resize.
Definition: NvInfer.h:4352
bool getExcludeOutside() const noexcept
Get the state for excluding outside pixels.
Definition: NvInfer.h:4407
ResizeCoordinateTransformation getCoordinateTransformation() const noexcept
Get coordinate transformation function.
Definition: NvInfer.h:4303
A ReverseSequence layer in a network definition.
Definition: NvInfer.h:6395
void setSequenceAxis(int32_t sequenceAxis) noexcept
Set the sequence axis. Default is 0.
Definition: NvInfer.h:6428
int32_t getBatchAxis() const noexcept
Return the batch axis. Return 1 if no batch axis was set.
Definition: NvInfer.h:6415
apiv::VReverseSequenceLayer * mImpl
Definition: NvInfer.h:6444
int32_t getSequenceAxis() const noexcept
Return the sequence axis. Return 0 if no sequence axis was set.
Definition: NvInfer.h:6438
void setBatchAxis(int32_t batchAxis) noexcept
Set the batch axis. Default is 1.
Definition: NvInfer.h:6405
virtual ~IReverseSequenceLayer() noexcept=default
Layer that implements Rotary Position Embedding (RoPE) (https://arxiv.org/abs/2104....
Definition: NvInfer.h:7274
TRT_NODISCARD int32_t getRotaryEmbeddingDim() const noexcept
Get the number of hidden dimensions participating in RoPE. The default value is 0,...
Definition: NvInfer.h:7314
virtual ~IRotaryEmbeddingLayer() noexcept=default
void setInterleaved(bool interleaved) noexcept
Set whether the input is in interleaved format, i.e., whether the 2-d vectors rotated are taken from ...
Definition: NvInfer.h:7281
TRT_NODISCARD bool setRotaryEmbeddingDim(int32_t rotaryEmbeddingDim) noexcept
Set the number of hidden dimensions participating in RoPE. The default value is 0,...
Definition: NvInfer.h:7303
apiv::VRotaryEmbeddingLayer * mImpl
Definition: NvInfer.h:7337
TRT_NODISCARD bool getInterleaved() const noexcept
Get whether the input is in interleaved format. The default value is false.
Definition: NvInfer.h:7292
A Scale layer in a network definition.
Definition: NvInfer.h:1813
Weights getScale() const noexcept
Get the scale value.
Definition: NvInfer.h:1870
Weights getPower() const noexcept
Get the power value.
Definition: NvInfer.h:1890
void setScale(Weights scale) noexcept
Set the scale value.
Definition: NvInfer.h:1860
void setPower(Weights power) noexcept
Set the power value.
Definition: NvInfer.h:1880
ScaleMode getMode() const noexcept
Get the scale mode.
Definition: NvInfer.h:1830
void setShift(Weights shift) noexcept
Set the shift value.
Definition: NvInfer.h:1840
void setChannelAxis(int32_t channelAxis) noexcept
Set the channel axis.
Definition: NvInfer.h:1926
Weights getShift() const noexcept
Get the shift value.
Definition: NvInfer.h:1850
virtual ~IScaleLayer() noexcept=default
void setMode(ScaleMode mode) noexcept
Set the scale mode.
Definition: NvInfer.h:1820
int32_t getChannelAxis() const noexcept
Get the channel axis.
Definition: NvInfer.h:1905
A scatter layer in a network definition. Supports several kinds of scattering.
Definition: NvInfer.h:6017
void setMode(ScatterMode mode) noexcept
Set the scatter mode.
Definition: NvInfer.h:6024
apiv::VScatterLayer * mImpl
Definition: NvInfer.h:6058
void setAxis(int32_t axis) noexcept
Set the axis used by ScatterMode::kELEMENTS.
Definition: NvInfer.h:6044
int32_t getAxis() const noexcept
Get the axis.
Definition: NvInfer.h:6052
ScatterMode getMode() const noexcept
Get the scatter mode.
Definition: NvInfer.h:6034
virtual ~IScatterLayer() noexcept=default
Select elements from two data tensors based on a condition tensor.
Definition: NvInfer.h:4982
virtual ~ISelectLayer() noexcept=default
Layer type for getting shape of a tensor.
Definition: NvInfer.h:3475
virtual ~IShapeLayer() noexcept=default
apiv::VShapeLayer * mImpl
Definition: NvInfer.h:3477
Layer type for shuffling data.
Definition: NvInfer.h:3038
apiv::VShuffleLayer * mImpl
Definition: NvInfer.h:3196
void setFirstTranspose(Permutation permutation) noexcept
Set the permutation applied by the first transpose operation.
Definition: NvInfer.h:3049
void setSecondTranspose(Permutation permutation) noexcept
Set the permutation applied by the second transpose operation.
Definition: NvInfer.h:3149
Dims getReshapeDimensions() const noexcept
Get the reshaped dimensions.
Definition: NvInfer.h:3102
void setReshapeDimensions(Dims const &dimensions) noexcept
Set the reshaped dimensions.
Definition: NvInfer.h:3089
Permutation getFirstTranspose() const noexcept
Get the permutation applied by the first transpose operation.
Definition: NvInfer.h:3061
virtual ~IShuffleLayer() noexcept=default
Permutation getSecondTranspose() const noexcept
Get the permutation applied by the second transpose operation.
Definition: NvInfer.h:3161
bool getZeroIsPlaceholder() const noexcept
Get meaning of 0 in reshape dimensions.
Definition: NvInfer.h:3190
void setZeroIsPlaceholder(bool zeroIsPlaceholder) noexcept
Set meaning of 0 in reshape dimensions.
Definition: NvInfer.h:3177
Slices an input tensor into an output tensor based on the offset and strides.
Definition: NvInfer.h:3290
void setStride(Dims const &stride) noexcept
Set the stride for computing the output slice data.
Definition: NvInfer.h:3359
apiv::VSliceLayer * mImpl
Definition: NvInfer.h:3458
virtual ~ISliceLayer() noexcept=default
void setSize(Dims const &size) noexcept
Set the dimensions of the output slice.
Definition: NvInfer.h:3330
void setAxes(Dims const &axes) noexcept
Set the axes for this ISliceLayer.
Definition: NvInfer.h:3437
void setStart(Dims const &start) noexcept
Set the start offset that the slice layer uses to create the output slice.
Definition: NvInfer.h:3301
Dims getStart() const noexcept
Get the start offset for the slice layer.
Definition: NvInfer.h:3316
void setMode(SampleMode mode) noexcept
Set the slice mode.
Definition: NvInfer.h:3384
Dims getSize() const noexcept
Get dimensions of the output slice.
Definition: NvInfer.h:3345
SampleMode getMode() const noexcept
Get the slice mode.
Definition: NvInfer.h:3394
Dims getStride() const noexcept
Get the stride for the output slice.
Definition: NvInfer.h:3374
Dims getAxes() const noexcept
Get the axes for this ISliceLayer.
Definition: NvInfer.h:3452
A Softmax layer in a network definition.
Definition: NvInfer.h:1957
void setAxes(uint32_t axes) noexcept
Set the axis along which softmax is computed. Currently, only one axis can be set.
Definition: NvInfer.h:1979
uint32_t getAxes() const noexcept
Get the axis along which softmax occurs.
Definition: NvInfer.h:1989
virtual ~ISoftMaxLayer() noexcept=default
Layer that represents a squeeze operation, removing unit dimensions of the first input tensor on a se...
Definition: NvInfer.h:6607
virtual ~ISqueezeLayer() noexcept=default
apiv::VSqueezeLayer * mImpl
Definition: NvInfer.h:6624
A tensor in a network definition.
Definition: NvInfer.h:189
void setAllowedFormats(TensorFormats formats) noexcept
Set allowed formats for an input or output tensor. By default all formats are allowed....
Definition: NvInfer.h:340
void setDimensions(Dims const &dimensions) noexcept
Set the dimensions of a tensor.
Definition: NvInfer.h:237
void setName(char const *name) noexcept
Set the tensor name.
Definition: NvInfer.h:206
bool isExecutionTensor() const noexcept
Whether the tensor is an execution tensor.
Definition: NvInfer.h:405
char const * getName() const noexcept
Get the tensor name.
Definition: NvInfer.h:218
bool isShapeTensor() const noexcept
Whether the tensor is a shape tensor.
Definition: NvInfer.h:384
bool isNetworkInput() const noexcept
Whether the tensor is a network input.
Definition: NvInfer.h:310
TRT_DEPRECATED void setType(DataType type) noexcept
Set the data type of a tensor.
Definition: NvInfer.h:287
bool isNetworkOutput() const noexcept
Whether the tensor is a network output.
Definition: NvInfer.h:318
DataType getType() const noexcept
Get the data type of a tensor.
Definition: NvInfer.h:302
apiv::VTensor * mImpl
Definition: NvInfer.h:452
virtual ~ITensor() noexcept=default
void setDimensionName(int32_t index, char const *name) noexcept
Name a dimension of an input tensor.
Definition: NvInfer.h:431
char const * getDimensionName(int32_t index) const noexcept
Get the name of an input dimension.
Definition: NvInfer.h:446
Dims getDimensions() const noexcept
Get the dimensions of a tensor.
Definition: NvInfer.h:251
TensorFormats getAllowedFormats() const noexcept
Get a bitmask of TensorFormat values that the tensor supports. For a shape tensor,...
Definition: NvInfer.h:353
Class to handle tactic timing info collected from builder.
Definition: NvInfer.h:9874
int64_t queryKeys(TimingCacheKey *keyBuffer, int64_t capacity) const noexcept
Query cache keys from Timing Cache.
Definition: NvInfer.h:9940
bool combine(ITimingCache const &inputCache, bool ignoreMismatch) noexcept
Combine input timing cache into local instance.
Definition: NvInfer.h:9911
TimingCacheValue query(TimingCacheKey const &key) const noexcept
Query value in a cache entry.
Definition: NvInfer.h:9957
virtual ~ITimingCache() noexcept=default
bool update(TimingCacheKey const &key, TimingCacheValue const &value) noexcept
Update values in a cache entry.
Definition: NvInfer.h:9979
apiv::VTimingCache * mImpl
Definition: NvInfer.h:9985
bool reset() noexcept
Empty the timing cache.
Definition: NvInfer.h:9921
Layer that represents a TopK reduction.
Definition: NvInfer.h:3515
void setK(int32_t k) noexcept
Set the static k value for the layer.
Definition: NvInfer.h:3546
void setReduceAxes(uint32_t reduceAxes) noexcept
Set which axes to reduce for the layer.
Definition: NvInfer.h:3570
TopKOperation getOperation() const noexcept
Get the operation for the layer.
Definition: NvInfer.h:3532
apiv::VTopKLayer * mImpl
Definition: NvInfer.h:3629
void setOperation(TopKOperation op) noexcept
Set the operation for the layer.
Definition: NvInfer.h:3522
bool setIndicesType(DataType type) noexcept
Set the indices type for the layer.
Definition: NvInfer.h:3611
int32_t getK() const noexcept
Get the k value for the layer.
Definition: NvInfer.h:3560
uint32_t getReduceAxes() const noexcept
Get the axes to reduce for the layer.
Definition: NvInfer.h:3580
virtual ~ITopKLayer() noexcept=default
DataType getIndicesType() const noexcept
Return the TopK layer indices type.
Definition: NvInfer.h:3623
A layer that represents a trip-count limiter.
Definition: NvInfer.h:4795
TripLimit getTripLimit() const noexcept
Get a trip limiter type.
Definition: NvInfer.h:4800
virtual ~ITripLimitLayer() noexcept=default
Layer that represents an unary operation.
Definition: NvInfer.h:2762
void setOperation(UnaryOperation op) noexcept
Set the unary operation for the layer.
Definition: NvInfer.h:2771
apiv::VUnaryLayer * mImpl
Definition: NvInfer.h:2787
UnaryOperation getOperation() const noexcept
Get the unary operation for the layer.
Definition: NvInfer.h:2781
virtual ~IUnaryLayer() noexcept=default
Layer that represents an unsqueeze operation, which reshapes the first input tensor by inserting unit...
Definition: NvInfer.h:6637
virtual ~IUnsqueezeLayer() noexcept=default
apiv::VUnsqueezeLayer * mImpl
Definition: NvInfer.h:6655
An Interface class for version control.
Definition: NvInferRuntimeBase.h:279
Version information associated with a TRT interface.
Definition: NvInferRuntimeBase.h:244
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:124
Definition: NvInferRuntimeBase.h:416
Definition: NvInferRuntime.h:1656
Definition: NvInferPluginBase.h:206
Definition: NvInfer.h:10232
virtual bool stepComplete(char const *phaseName, int32_t step) noexcept=0
Signal that a step of an optimizer phase has finished.
virtual ~IProgressMonitor() noexcept=default
virtual void phaseFinish(char const *phaseName) noexcept=0
Signal that a phase of the optimizer has finished.
virtual void phaseStart(char const *phaseName, char const *parentPhase, int32_t nbSteps) noexcept=0
Signal that a phase of the optimizer has started.
Definition: NvInferRuntime.h:666
IBuilder * createInferBuilder(ILogger &logger) noexcept
Create an instance of an IBuilder class.
Definition: NvInfer.h:11500
The TensorRT API version 1 namespace.
Definition: NvInferPluginBase.h:29
uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferRuntime.h:2958
ResizeSelector
The coordinate selector when resize to single pixel output.
Definition: NvInfer.h:4062
@ kFORMULA
Use formula to map the original index.
@ kUPPER
Select the upper left pixel.
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:76
MemoryPoolType
The type for memory pools used by TensorRT.
Definition: NvInfer.h:9996
TENSORRTAPI bool setInternalLibraryPath(AsciiChar const *path) noexcept
Set a custom directory path for loading internal TensorRT libraries when building engines.
ScaleMode
Controls how shift, scale and power are applied in a Scale layer.
Definition: NvInfer.h:1770
@ kUNIFORM
Identical coefficients across all elements of the tensor.
@ kCHANNEL
Per-channel coefficients.
RuntimePlatform
Describes the intended runtime platform (operating system and CPU architecture) for the execution of ...
Definition: NvInfer.h:9569
@ kNONE
Tensor is not an input or output.
HardwareCompatibilityLevel
Describes requirements of compatibility with GPU architectures other than that of the GPU on which th...
Definition: NvInfer.h:10124
CumulativeOperation
Enumerates the cumulative operations that may be performed by a Cumulative layer.
Definition: NvInfer.h:6671
BoundingBoxFormat
Representation of bounding box data used for the Boxes input tensor in INMSLayer.
Definition: NvInfer.h:6209
@ kCENTER_SIZES
(x_center, y_center, width, height) where (x_center, y_center) is the center point of the box
@ kCORNER_PAIRS
(x1, y1, x2, y2) where (x1, y1) and (x2, y2) are any pair of diagonal corners
constexpr int32_t EnumMax< BuilderFlag >() noexcept
Definition: NvInfer.h:9808
constexpr int32_t EnumMax< LayerType >() noexcept
Definition: NvInfer.h:124
ComputeCapability
Describes compute capability that an engine will be built for.
Definition: NvInfer.h:10173
@ kSM120
Target NVIDIA Blackwell GPU architecture (SM 12.0).
@ kSM75
Target NVIDIA Turing GPU architecture (SM 7.5).
@ kSM80
Target NVIDIA Ampere GPU architecture (SM 8.0).
@ kCURRENT
Use the compute capability of the current GPU in the environment.
@ kSM89
Target NVIDIA Ada Lovelace GPU architecture (SM 8.9).
@ kSM86
Target NVIDIA Ampere GPU architecture (SM 8.6).
@ kFP4
FP4 field type.
@ kINT8
INT8 field type.
@ kFP8
FP8 field type.
@ kBF16
BF16 field type.
@ kINT4
INT4 field type.
UnaryOperation
Enumerates the unary operations that may be performed by a Unary layer.
Definition: NvInfer.h:2715
@ kISINF
Return true if input value equals +/- infinity for floating-point data type.
@ kCOSH
Hyperbolic cosine.
@ kACOSH
Inverse hyperbolic cosine.
@ kERF
Gauss error function.
@ kISNAN
Return true if input value is a NaN for floating-point data type.
@ kACOS
Inverse cosine.
@ kABS
Absolute value.
@ kSINH
Hyperbolic sine.
@ kROUND
Round to nearest even for floating-point data type.
@ kATANH
Inverse hyperbolic tangent.
@ kASINH
Inverse hyperbolic sine.
@ kSIGN
Sign, If input > 0, output 1; if input < 0, output -1; if input == 0, output 0.
@ kEXP
Exponentiation.
@ kATAN
Inverse tangent.
constexpr int32_t EnumMax< ReduceOperation >() noexcept
Definition: NvInfer.h:2830
constexpr int32_t EnumMax< TripLimit >() noexcept
Definition: NvInfer.h:4463
ActivationType
Enumerates the types of activation to perform in an activation layer.
Definition: NvInfer.h:143
@ kSELU
Selu activation: x>0 ? beta * x : beta * (alpha*exp(x) - alpha)
@ kTANH
TanH activation.
@ kSCALED_TANH
Scaled tanh activation: alpha*tanh(beta*x)
@ kRELU
Rectified linear activation.
@ kELU
Elu activation: x>=0 ? x : alpha * (exp(x) - 1).
@ kLEAKY_RELU
LeakyRelu activation: x>=0 ? x : alpha * x.
@ kSOFTSIGN
Softsign activation: x / (1+|x|)
@ kHARD_SIGMOID
Hard sigmoid activation: max(0, min(1, alpha*x+beta))
@ kTHRESHOLDED_RELU
Thresholded ReLU activation: x>alpha ? x : 0.
@ kSIGMOID
Sigmoid activation.
@ kCLIP
Clip activation: max(alpha, min(beta, x))
@ kGELU_TANH
GELU tanh activation: 0.5 * x * (1 + tanh(sqrt(2/pi) * (0.044715F * pow(x, 3) + x)))
@ kGELU_ERF
GELU erf activation: 0.5 * x * (1 + erf(sqrt(0.5) * x))
@ kSOFTPLUS
Parametric softplus activation: alpha*log(exp(beta*x)+1)
FillOperation
Enumerates the tensor fill operations that may performed by a fill layer.
Definition: NvInfer.h:5043
ResizeRoundMode
The rounding mode for nearest neighbor resize.
Definition: NvInfer.h:4092
@ kHALF_UP
Round half up.
@ kHALF_DOWN
Round half down.
char_t AsciiChar
Definition: NvInferRuntimeBase.h:116
PaddingMode
Enumerates the modes of padding to perform in convolution, deconvolution and pooling layer,...
Definition: NvInfer.h:948
@ kSAME_LOWER
Use SAME padding, with prePadding >= postPadding.
@ kEXPLICIT_ROUND_DOWN
Use explicit padding, rounding output size down.
@ kEXPLICIT_ROUND_UP
Use explicit padding, rounding output size up.
@ kSAME_UPPER
Use SAME padding, with prePadding <= postPadding.
TripLimit
Enum that describes kinds of trip limits.
Definition: NvInfer.h:4451
@ kWHILE
Tensor is a scalar of type kBOOL. Loop terminates when value is false.
@ kCOUNT
Tensor is a scalar of type kINT32 or kINT64 that contains the trip count.
uint32_t NetworkDefinitionCreationFlags
Represents one or more NetworkDefinitionCreationFlag flags using binary OR operations....
Definition: NvInfer.h:11176
PreviewFeature
Define preview features.
Definition: NvInfer.h:10071
TilingOptimizationLevel
Define the optimization levels for Tiling.
Definition: NvInfer.h:10199
@ kFAST
Use a fast algorithm and heuristic based strategy. Slightly increases engine build time.
@ kFULL
Increase search space even wider. Significantly increases engine build time.
constexpr int32_t EnumMax< GatherMode >() noexcept
Definition: NvInfer.h:2474
DataType
The type of weights and tensors. The datatypes other than kBOOL, kINT32, and kINT64 are "activation d...
Definition: NvInferRuntimeBase.h:146
uint32_t BuilderFlags
Represents one or more BuilderFlag values using binary OR operations, e.g., 1U << BuilderFlag::kFP16 ...
Definition: NvInfer.h:9601
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:1350
constexpr int32_t EnumMax< ScaleMode >() noexcept
Definition: NvInfer.h:1782
LayerType
The type values of layer classes.
Definition: NvInfer.h:58
@ kGRID_SAMPLE
Grid sample layer.
@ kRAGGED_SOFTMAX
Ragged softmax layer.
@ kDECONVOLUTION
Deconvolution layer.
@ kREDUCE
Reduce layer.
@ kASSERTION
Assertion layer.
@ kTOPK
TopK layer.
@ kRESIZE
Resize Layer.
@ kCAST
Cast layer.
@ kPADDING
Padding layer.
@ kSQUEEZE
Squeeze Layer.
@ kATTENTION_INPUT
Attention Input.
@ kMATRIX_MULTIPLY
Matrix multiply layer.
@ kCONDITION
Condition layer.
@ kCUMULATIVE
Cumulative layer.
@ kCONDITIONAL_INPUT
Conditional Input layer.
@ kIDENTITY
Identity layer.
@ kNORMALIZATION
Normalization layer.
@ kQUANTIZE
Quantize layer.
@ kSCATTER
Scatter layer.
@ kCONVOLUTION
Convolution layer.
@ kPARAMETRIC_RELU
Parametric ReLU layer.
@ kATTENTION_OUTPUT
Attention Output.
@ kUNSQUEEZE
Unsqueeze Layer.
@ kCONCATENATION
Concatenation layer.
@ kONE_HOT
OneHot layer.
@ kREVERSE_SEQUENCE
Reverse sequence layer.
@ kSLICE
Slice layer.
@ kEINSUM
Einsum layer.
@ kSOFTMAX
SoftMax layer.
@ kSHAPE
Shape layer.
@ kROTARY_EMBEDDING
Rotary Embedding layer.
@ kRECURRENCE
Loop Recurrence layer.
@ kDEQUANTIZE
Dequantize layer.
@ kSHUFFLE
Shuffle layer.
@ kPLUGIN_V3
PluginV3 layer.
@ kITERATOR
Loop Iterator layer.
@ kPOOLING
Pooling layer.
@ kTRIP_LIMIT
Loop Trip limit layer.
@ kSCALE
Scale layer.
@ kDYNAMIC_QUANTIZE
Dynamic Quantize layer.
@ kGATHER
Gather layer.
@ kUNARY
UnaryOp operation Layer.
@ kACTIVATION
Activation layer.
@ kELEMENTWISE
Elementwise layer.
@ kSELECT
Select layer.
@ kPLUGIN_V2
PluginV2 layer.
@ kLOOP_OUTPUT
Loop output layer.
@ kCONDITIONAL_OUTPUT
Conditional Output layer.
@ kCONSTANT
Constant layer.
@ kNON_ZERO
NonZero layer.
@ kFILL
Fill layer.
@ kKVCACHE_UPDATE
KV Cache Update layer.
@ kPLUGIN
Plugin layer.
@ kDIST_COLLECTIVE
DistCollective layer.
SampleMode
Controls how ISliceLayer and IGridSample handle out-of-bounds coordinates.
Definition: NvInfer.h:3206
@ kCLAMP
Out of bounds indices are clamped to bounds.
@ kSTRICT_BOUNDS
Fail with error when the coordinates are out of bounds.
@ kWRAP
Coordinates wrap around periodically.
GatherMode
Control form of IGatherLayer.
Definition: NvInfer.h:2462
@ kDEFAULT
Similar to ONNX Gather.
@ kELEMENT
Similar to ONNX GatherElements.
@ kND
Similar to ONNX GatherND.
MoEActType
Enumerates the activation type for the MoE layer.
Definition: NvInfer.h:7435
uint32_t TensorFormats
It is capable of representing one or more TensorFormat by binary OR operations, e....
Definition: NvInfer.h:135
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
Definition: NvInferRuntime.h:2970
NetworkDefinitionCreationFlag
List of immutable network properties expressed at network creation time. NetworkDefinitionCreationFla...
Definition: NvInfer.h:11187
ElementWiseOperation
Enumerates the binary operations that may be performed by an ElementWise layer.
Definition: NvInfer.h:2372
@ kSUB
Subtract the second element from the first.
@ kSUM
Sum of the two elements.
@ kPROD
Product of the two elements.
@ kFLOOR_DIV
Floor division of the first element by the second.
@ kEQUAL
Check if two elements are equal.
@ kAND
Logical AND of two elements.
@ kOR
Logical OR of two elements.
@ kMIN
Minimum of the two elements.
@ kPOW
The first element to the power of the second element.
@ kLESS
Check if element in first tensor is less than corresponding element in second tensor.
@ kGREATER
Check if element in first tensor is greater than corresponding element in second tensor.
@ kXOR
Logical XOR of two elements.
@ kDIV
Divide the first element by the second.
CollectiveOperation
Enumerates the collective operations that may be performed by a DistCollective layer.
Definition: NvInfer.h:2843
@ kREDUCE_SCATTER
Reduce scatter.
constexpr int32_t EnumMax< SampleMode >() noexcept
Definition: NvInfer.h:3222
InterpolationMode
Enumerates various modes of interpolation.
Definition: NvInfer.h:3980
@ kNEAREST
ND (0 < N <= 8) nearest neighbor resizing.
@ kCUBIC
Supports bicubic (2D) interpolation.
@ kLINEAR
Supports linear (1D), bilinear (2D), and trilinear (3D) interpolation.
BuilderFlag
List of valid modes that the builder can enable when creating an engine from a network definition.
Definition: NvInfer.h:9611
@ kWEIGHT_STREAMING
Enable weight streaming for the current engine.
@ kDEBUG
Enable debugging of layers via synchronizing after every layer.
@ kGPU_FALLBACK
Enable layers marked to execute on GPU if layer cannot execute on DLA.
@ kSPARSE_WEIGHTS
Allow the builder to examine weights and use optimized functions when weights have suitable sparsity.
@ kEDITABLE_TIMING_CACHE
Enable editable timing cache.
@ kSTRIP_PLAN
Strip the refittable weights from the engine plan file.
@ kMONITOR_MEMORY
Enable memory monitor during build time.
@ kDISABLE_TIMING_CACHE
Disable reuse of timing information across identical layers.
@ kREFIT
Enable building a refittable engine.
constexpr int32_t EnumMax< TopKOperation >() noexcept
Definition: NvInfer.h:3498
TENSORRTAPI nvinfer1::IPluginRegistry * getBuilderPluginRegistry(nvinfer1::EngineCapability capability) noexcept
Return the plugin registry for building a Standard engine, or nullptr if no registry exists.
constexpr int32_t EnumMax< MemoryPoolType >() noexcept
Definition: NvInfer.h:10057
TopKOperation
Enumerates the operations that may be performed by a TopK layer.
Definition: NvInfer.h:3487
ReduceOperation
Enumerates the reduce operations that may be performed by a Reduce layer.
Definition: NvInfer.h:2815
@ kAVG
Average of the elements.
constexpr int32_t EnumMax< LoopOutput >() noexcept
Definition: NvInfer.h:4440
constexpr int32_t EnumMax< NetworkDefinitionCreationFlag >() noexcept
Definition: NvInfer.h:11206
ScatterMode
Control form of IScatterLayer.
Definition: NvInfer.h:5943
MatrixOperation
Enumerates the operations that may be performed on a tensor by IMatrixMultiplyLayer before multiplica...
Definition: NvInfer.h:3640
@ kTRANSPOSE
Like kNONE, but transpose the matrix dimensions.
ResizeCoordinateTransformation
The resize coordinate transformation function.
Definition: NvInfer.h:4008
constexpr int32_t EnumMax< UnaryOperation >() noexcept
Definition: NvInfer.h:2749
LoopOutput
Enum that describes kinds of loop outputs.
Definition: NvInfer.h:4423
@ kLAST_VALUE
Output value is value of tensor for last iteration.
@ kCONCATENATE
Output value is concatenation of values of tensor for each iteration, in forward order.
@ kREVERSE
Output value is concatenation of values of tensor for each iteration, in reverse order.
constexpr int32_t EnumMax< BoundingBoxFormat >() noexcept
Definition: NvInfer.h:6222
constexpr int32_t EnumMax< MatrixOperation >() noexcept
Definition: NvInfer.h:3668
KVCacheMode
Enumerates the KVCache modes that may be performed by a KVCacheUpdate layer.
Definition: NvInfer.h:7347
PoolingType
The type of pooling to perform in a pooling layer.
Definition: NvInfer.h:1384
@ kAVERAGE
Average over elements. If the tensor is padded, the count includes the padding.
@ kMAX
Maximum over elements.
@ kMAX_AVERAGE_BLEND
Blending between max and average pooling: (1-blendFactor)*maxPool + blendFactor*avgPool.
v_1_0::IProgressMonitor IProgressMonitor
Definition: NvInfer.h:10315
constexpr int32_t EnumMax< FillOperation >() noexcept
Definition: NvInfer.h:5077
AttentionNormalizationOp
Enumerates the operations that may be performed by the normalization in the attention subgraph.
Definition: NvInfer.h:6806
constexpr int32_t EnumMax< ScatterMode >() noexcept
Definition: NvInfer.h:5954
Represents a permutation of dimensions.
Definition: NvInfer.h:3015
Declaration of EnumMaxImpl struct to store maximum number of elements in an enumeration type.
Definition: NvInferRuntimeBase.h:129
The key to retrieve timing cache entries.
Definition: NvInfer.h:9834
Definition: NvInfer.h:9848
uint64_t tacticHash
Hash of the selected tactic.
Definition: NvInfer.h:9850
float timingMSec
Timing of this tactic in milliseconds. Negative numbers and NaN are invalid values.
Definition: NvInfer.h:9852

  Copyright © 2024 NVIDIA Corporation
  Privacy Policy | Manage My Privacy | Do Not Sell or Share My Data | Terms of Service | Accessibility | Corporate Policies | Product Security | Contact