TensorRT 10.16.0
NvInfer.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright (c) 1993-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef NV_INFER_H
19#define NV_INFER_H
20
21#include "NvInferLegacyDims.h"
22#include "NvInferRuntime.h" // IWYU pragma: export
23
35
41
47namespace nvinfer1
48{
49
57enum class LayerType : int32_t
58{
59 kCONVOLUTION = 0,
60 kCAST = 1,
61 kACTIVATION = 2,
62 kPOOLING = 3,
63 kLRN = 4,
64 kSCALE = 5,
65 kSOFTMAX = 6,
66 kDECONVOLUTION = 7,
67 kCONCATENATION = 8,
68 kELEMENTWISE = 9,
69 kPLUGIN = 10,
70 kUNARY = 11,
71 kPADDING = 12,
72 kSHUFFLE = 13,
73 kREDUCE = 14,
74 kTOPK = 15,
75 kGATHER = 16,
76 kMATRIX_MULTIPLY = 17,
77 kRAGGED_SOFTMAX = 18,
78 kCONSTANT = 19,
79 kIDENTITY = 20,
80 kPLUGIN_V2 = 21,
81 kSLICE = 22,
82 kSHAPE = 23,
83 kPARAMETRIC_RELU = 24,
84 kRESIZE = 25,
85 kTRIP_LIMIT = 26,
86 kRECURRENCE = 27,
87 kITERATOR = 28,
88 kLOOP_OUTPUT = 29,
89 kSELECT = 30,
90 kFILL = 31,
91 kQUANTIZE = 32,
92 kDEQUANTIZE = 33,
93 kCONDITION = 34,
96 kSCATTER = 37,
97 kEINSUM = 38,
98 kASSERTION = 39,
99 kONE_HOT = 40,
100 kNON_ZERO = 41,
101 kGRID_SAMPLE = 42,
102 kNMS = 43,
103 kREVERSE_SEQUENCE = 44,
104 kNORMALIZATION = 45,
105 kPLUGIN_V3 = 46,
106 kSQUEEZE = 47,
107 kUNSQUEEZE = 48,
108 kCUMULATIVE = 49,
109 kDYNAMIC_QUANTIZE = 50,
110 kATTENTION_INPUT = 51,
111 kATTENTION_OUTPUT = 52,
112 kROTARY_EMBEDDING = 53,
113 kKVCACHE_UPDATE = 54,
114 kMOE = 55,
115 kDIST_COLLECTIVE = 56,
116};
117
123template <>
124constexpr inline int32_t EnumMax<LayerType>() noexcept
125{
126 return 57;
127}
128
135using TensorFormats = uint32_t;
136
142enum class ActivationType : int32_t
143{
144 kRELU = 0,
145 kSIGMOID = 1,
146 kTANH = 2,
147 kLEAKY_RELU = 3,
148 kELU = 4,
149 kSELU = 5,
150 kSOFTSIGN = 6,
151 kSOFTPLUS = 7,
152 kCLIP = 8,
153 kHARD_SIGMOID = 9,
154 kSCALED_TANH = 10,
155 kTHRESHOLDED_RELU = 11,
156 kGELU_ERF = 12,
157 kGELU_TANH = 13
158};
159
160namespace impl
161{
167template <>
169{
170 static constexpr int32_t kVALUE = 14;
171};
172} // namespace impl
173
188class ITensor : public INoCopy
189{
190public:
206 void setName(char const* name) noexcept
207 {
208 mImpl->setName(name);
209 }
210
218 char const* getName() const noexcept
219 {
220 return mImpl->getName();
221 }
222
237 void setDimensions(Dims const& dimensions) noexcept
238 {
239 mImpl->setDimensions(dimensions);
240 }
241
251 Dims getDimensions() const noexcept
252 {
253 return mImpl->getDimensions();
254 }
255
287 TRT_DEPRECATED void setType(DataType type) noexcept
288 {
289 mImpl->setType(type);
290 }
291
302 DataType getType() const noexcept
303 {
304 return mImpl->getType();
305 }
306
319 TRT_DEPRECATED bool setDynamicRange(float min, float max) noexcept
320 {
321 return mImpl->setDynamicRange(min, max);
322 }
323
327 bool isNetworkInput() const noexcept
328 {
329 return mImpl->isNetworkInput();
330 }
331
335 bool isNetworkOutput() const noexcept
336 {
337 return mImpl->isNetworkOutput();
338 }
339
352 TRT_DEPRECATED void setBroadcastAcrossBatch(bool broadcastAcrossBatch) noexcept
353 {
354 mImpl->setBroadcastAcrossBatch(broadcastAcrossBatch);
355 }
356
367 {
368 return mImpl->getBroadcastAcrossBatch();
369 }
370
379 {
380 return mImpl->getLocation();
381 }
382
398 {
399 mImpl->setLocation(location);
400 }
401
409 TRT_DEPRECATED bool dynamicRangeIsSet() const noexcept
410 {
411 return mImpl->dynamicRangeIsSet();
412 }
413
417 void resetDynamicRange() noexcept
418 {
419 mImpl->resetDynamicRange();
420 }
421
427 float getDynamicRangeMin() const noexcept
428 {
429 return mImpl->getDynamicRangeMin();
430 }
431
437 float getDynamicRangeMax() const noexcept
438 {
439 return mImpl->getDynamicRangeMax();
440 }
441
459 void setAllowedFormats(TensorFormats formats) noexcept
460 {
461 mImpl->setAllowedFormats(formats);
462 }
463
473 {
474 return mImpl->getAllowedFormats();
475 }
476
503 bool isShapeTensor() const noexcept
504 {
505 return mImpl->isShapeTensor();
506 }
507
524 bool isExecutionTensor() const noexcept
525 {
526 return mImpl->isExecutionTensor();
527 }
528
550 void setDimensionName(int32_t index, char const* name) noexcept
551 {
552 mImpl->setDimensionName(index, name);
553 }
554
565 char const* getDimensionName(int32_t index) const noexcept
566 {
567 return mImpl->getDimensionName(index);
568 }
569
570protected:
571 apiv::VTensor* mImpl;
572 virtual ~ITensor() noexcept = default;
573};
574
582class ILayer : public INoCopy
583{
584public:
590 LayerType getType() const noexcept
591 {
592 return mLayer->getType();
593 }
594
604 void setName(char const* name) noexcept
605 {
606 mLayer->setName(name);
607 }
608
614 char const* getName() const noexcept
615 {
616 return mLayer->getName();
617 }
618
622 int32_t getNbInputs() const noexcept
623 {
624 return mLayer->getNbInputs();
625 }
626
635 ITensor* getInput(int32_t index) const noexcept
636 {
637 return mLayer->getInput(index);
638 }
639
643 int32_t getNbOutputs() const noexcept
644 {
645 return mLayer->getNbOutputs();
646 }
647
653 ITensor* getOutput(int32_t index) const noexcept
654 {
655 return mLayer->getOutput(index);
656 }
657
670 void setInput(int32_t index, ITensor& tensor) noexcept
671 {
672 return mLayer->setInput(index, tensor);
673 }
674
703 TRT_DEPRECATED void setPrecision(DataType dataType) noexcept
704 {
705 mLayer->setPrecision(dataType);
706 }
707
715 DataType getPrecision() const noexcept
716 {
717 return mLayer->getPrecision();
718 }
719
729 TRT_DEPRECATED bool precisionIsSet() const noexcept
730 {
731 return mLayer->precisionIsSet();
732 }
733
742 {
743 mLayer->resetPrecision();
744 }
745
791 TRT_DEPRECATED void setOutputType(int32_t index, DataType dataType) noexcept
792 {
793 mLayer->setOutputType(index, dataType);
794 }
795
806 DataType getOutputType(int32_t index) const noexcept
807 {
808 return mLayer->getOutputType(index);
809 }
810
822 TRT_DEPRECATED bool outputTypeIsSet(int32_t index) const noexcept
823 {
824 return mLayer->outputTypeIsSet(index);
825 }
826
836 TRT_DEPRECATED void resetOutputType(int32_t index) noexcept
837 {
838 return mLayer->resetOutputType(index);
839 }
840
854 void setMetadata(char const* metadata) noexcept
855 {
856 mLayer->setMetadata(metadata);
857 }
858
867 char const* getMetadata() const noexcept
868 {
869 return mLayer->getMetadata();
870 }
871
888 bool setNbRanks(int32_t nbRanks) noexcept
889 {
890 return mLayer->setNbRanks(nbRanks);
891 }
892
900 int32_t getNbRanks() const noexcept
901 {
902 return mLayer->getNbRanks();
903 }
904
905protected:
906 virtual ~ILayer() noexcept = default;
907 apiv::VLayer* mLayer;
908};
909
1066enum class PaddingMode : int32_t
1067{
1069 kEXPLICIT_ROUND_UP = 1,
1070 kSAME_UPPER = 2,
1071 kSAME_LOWER = 3,
1072};
1073
1074namespace impl
1075{
1081template <>
1083{
1084 static constexpr int32_t kVALUE = 4;
1085};
1086} // namespace impl
1087
1101{
1102public:
1110 void setNbOutputMaps(int64_t nbOutputMaps) noexcept
1111 {
1112 mImpl->setNbOutputMaps(nbOutputMaps);
1113 }
1114
1120 int64_t getNbOutputMaps() const noexcept
1121 {
1122 return mImpl->getNbOutputMaps();
1123 }
1124
1140 void setNbGroups(int64_t nbGroups) noexcept
1141 {
1142 mImpl->setNbGroups(nbGroups);
1143 }
1144
1150 int64_t getNbGroups() const noexcept
1151 {
1152 return mImpl->getNbGroups();
1153 }
1154
1164 void setKernelWeights(Weights weights) noexcept
1165 {
1166 mImpl->setKernelWeights(weights);
1167 }
1168
1174 Weights getKernelWeights() const noexcept
1175 {
1176 return mImpl->getKernelWeights();
1177 }
1178
1189 void setBiasWeights(Weights weights) noexcept
1190 {
1191 mImpl->setBiasWeights(weights);
1192 }
1193
1199 Weights getBiasWeights() const noexcept
1200 {
1201 return mImpl->getBiasWeights();
1202 }
1203
1216 void setPrePadding(Dims const& padding) noexcept
1217 {
1218 mImpl->setPrePadding(padding);
1219 }
1220
1226 Dims getPrePadding() const noexcept
1227 {
1228 return mImpl->getPrePadding();
1229 }
1230
1243 void setPostPadding(Dims const& padding) noexcept
1244 {
1245 mImpl->setPostPadding(padding);
1246 }
1247
1253 Dims getPostPadding() const noexcept
1254 {
1255 return mImpl->getPostPadding();
1256 }
1257
1267 void setPaddingMode(PaddingMode paddingMode) noexcept
1268 {
1269 mImpl->setPaddingMode(paddingMode);
1270 }
1271
1280 {
1281 return mImpl->getPaddingMode();
1282 }
1283
1292 void setKernelSizeNd(Dims const& kernelSize) noexcept
1293 {
1294 mImpl->setKernelSizeNd(kernelSize);
1295 }
1296
1302 Dims getKernelSizeNd() const noexcept
1303 {
1304 return mImpl->getKernelSizeNd();
1305 }
1306
1317 void setStrideNd(Dims const& stride) noexcept
1318 {
1319 mImpl->setStrideNd(stride);
1320 }
1321
1327 Dims getStrideNd() const noexcept
1328 {
1329 return mImpl->getStrideNd();
1330 }
1331
1345 void setPaddingNd(Dims const& padding) noexcept
1346 {
1347 mImpl->setPaddingNd(padding);
1348 }
1349
1357 Dims getPaddingNd() const noexcept
1358 {
1359 return mImpl->getPaddingNd();
1360 }
1361
1371 void setDilationNd(Dims const& dilation) noexcept
1372 {
1373 mImpl->setDilationNd(dilation);
1374 }
1375
1381 Dims getDilationNd() const noexcept
1382 {
1383 return mImpl->getDilationNd();
1384 }
1385
1400 using ILayer::setInput;
1401
1402protected:
1403 virtual ~IConvolutionLayer() noexcept = default;
1404 apiv::VConvolutionLayer* mImpl;
1405};
1406
1421{
1422public:
1431 {
1432 mImpl->setActivationType(type);
1433 }
1434
1441 {
1442 return mImpl->getActivationType();
1443 }
1444
1455 void setAlpha(float alpha) noexcept
1456 {
1457 mImpl->setAlpha(alpha);
1458 }
1459
1469 void setBeta(float beta) noexcept
1470 {
1471 mImpl->setBeta(beta);
1472 }
1473
1478 float getAlpha() const noexcept
1479 {
1480 return mImpl->getAlpha();
1481 }
1482
1487 float getBeta() const noexcept
1488 {
1489 return mImpl->getBeta();
1490 }
1491
1492protected:
1493 virtual ~IActivationLayer() noexcept = default;
1494 apiv::VActivationLayer* mImpl;
1495};
1496
1502enum class PoolingType : int32_t
1503{
1504 kMAX = 0,
1505 kAVERAGE = 1,
1507};
1508
1509namespace impl
1510{
1516template <>
1518{
1519 static constexpr int32_t kVALUE = 3;
1520};
1521} // namespace impl
1522
1534class IPoolingLayer : public ILayer
1535{
1536public:
1544 void setPoolingType(PoolingType type) noexcept
1545 {
1546 mImpl->setPoolingType(type);
1547 }
1548
1555 {
1556 return mImpl->getPoolingType();
1557 }
1558
1569 void setBlendFactor(float blendFactor) noexcept
1570 {
1571 mImpl->setBlendFactor(blendFactor);
1572 }
1573
1582 float getBlendFactor() const noexcept
1583 {
1584 return mImpl->getBlendFactor();
1585 }
1586
1596 void setAverageCountExcludesPadding(bool exclusive) noexcept
1597 {
1598 mImpl->setAverageCountExcludesPadding(exclusive);
1599 }
1600
1608 {
1609 return mImpl->getAverageCountExcludesPadding();
1610 }
1611
1625 void setPrePadding(Dims const& padding) noexcept
1626 {
1627 mImpl->setPrePadding(padding);
1628 }
1629
1635 Dims getPrePadding() const noexcept
1636 {
1637 return mImpl->getPrePadding();
1638 }
1639
1653 void setPostPadding(Dims const& padding) noexcept
1654 {
1655 mImpl->setPostPadding(padding);
1656 }
1657
1663 Dims getPostPadding() const noexcept
1664 {
1665 return mImpl->getPostPadding();
1666 }
1667
1676 void setPaddingMode(PaddingMode paddingMode) noexcept
1677 {
1678 mImpl->setPaddingMode(paddingMode);
1679 }
1680
1688 {
1689 return mImpl->getPaddingMode();
1690 }
1691
1700 void setWindowSizeNd(Dims const& windowSize) noexcept
1701 {
1702 mImpl->setWindowSizeNd(windowSize);
1703 }
1704
1710 Dims getWindowSizeNd() const noexcept
1711 {
1712 return mImpl->getWindowSizeNd();
1713 }
1714
1725 void setStrideNd(Dims const& stride) noexcept
1726 {
1727 mImpl->setStrideNd(stride);
1728 }
1729
1735 Dims getStrideNd() const noexcept
1736 {
1737 return mImpl->getStrideNd();
1738 }
1739
1754 void setPaddingNd(Dims const& padding) noexcept
1755 {
1756 mImpl->setPaddingNd(padding);
1757 }
1758
1766 Dims getPaddingNd() const noexcept
1767 {
1768 return mImpl->getPaddingNd();
1769 }
1770
1771protected:
1772 virtual ~IPoolingLayer() noexcept = default;
1773 apiv::VPoolingLayer* mImpl;
1774};
1775
1785class ILRNLayer : public ILayer
1786{
1787public:
1797 void setWindowSize(int64_t windowSize) noexcept
1798 {
1799 mImpl->setWindowSize(windowSize);
1800 }
1801
1807 int64_t getWindowSize() const noexcept
1808 {
1809 return mImpl->getWindowSize();
1810 }
1811
1819 void setAlpha(float alpha) noexcept
1820 {
1821 mImpl->setAlpha(alpha);
1822 }
1823
1829 float getAlpha() const noexcept
1830 {
1831 return mImpl->getAlpha();
1832 }
1833
1841 void setBeta(float beta) noexcept
1842 {
1843 mImpl->setBeta(beta);
1844 }
1845
1851 float getBeta() const noexcept
1852 {
1853 return mImpl->getBeta();
1854 }
1855
1863 void setK(float k) noexcept
1864 {
1865 mImpl->setK(k);
1866 }
1867
1873 float getK() const noexcept
1874 {
1875 return mImpl->getK();
1876 }
1877
1878protected:
1879 virtual ~ILRNLayer() noexcept = default;
1880 apiv::VLRNLayer* mImpl;
1881};
1882
1888enum class ScaleMode : int32_t
1889{
1890 kUNIFORM = 0,
1891 kCHANNEL = 1,
1892 kELEMENTWISE = 2
1893};
1894
1900template <>
1901constexpr inline int32_t EnumMax<ScaleMode>() noexcept
1902{
1903 return 3;
1904}
1905
1931class IScaleLayer : public ILayer
1932{
1933public:
1939 void setMode(ScaleMode mode) noexcept
1940 {
1941 mImpl->setMode(mode);
1942 }
1943
1949 ScaleMode getMode() const noexcept
1950 {
1951 return mImpl->getMode();
1952 }
1953
1959 void setShift(Weights shift) noexcept
1960 {
1961 mImpl->setShift(shift);
1962 }
1963
1969 Weights getShift() const noexcept
1970 {
1971 return mImpl->getShift();
1972 }
1973
1979 void setScale(Weights scale) noexcept
1980 {
1981 mImpl->setScale(scale);
1982 }
1983
1989 Weights getScale() const noexcept
1990 {
1991 return mImpl->getScale();
1992 }
1993
1999 void setPower(Weights power) noexcept
2000 {
2001 mImpl->setPower(power);
2002 }
2003
2009 Weights getPower() const noexcept
2010 {
2011 return mImpl->getPower();
2012 }
2013
2024 int32_t getChannelAxis() const noexcept
2025 {
2026 return mImpl->getChannelAxis();
2027 }
2028
2045 void setChannelAxis(int32_t channelAxis) noexcept
2046 {
2047 mImpl->setChannelAxis(channelAxis);
2048 }
2049
2050protected:
2051 virtual ~IScaleLayer() noexcept = default;
2052 apiv::VScaleLayer* mImpl;
2053};
2054
2075class ISoftMaxLayer : public ILayer
2076{
2077public:
2098 void setAxes(uint32_t axes) noexcept
2099 {
2100 mImpl->setAxes(axes);
2101 }
2102
2108 uint32_t getAxes() const noexcept
2109 {
2110 return mImpl->getAxes();
2111 }
2112
2113protected:
2114 virtual ~ISoftMaxLayer() noexcept = default;
2115 apiv::VSoftMaxLayer* mImpl;
2116};
2117
2131{
2132public:
2144 void setAxis(int32_t axis) noexcept
2145 {
2146 mImpl->setAxis(axis);
2147 }
2148
2154 int32_t getAxis() const noexcept
2155 {
2156 return mImpl->getAxis();
2157 }
2158
2159protected:
2160 virtual ~IConcatenationLayer() noexcept = default;
2161 apiv::VConcatenationLayer* mImpl;
2162};
2163
2172{
2173public:
2181 void setNbOutputMaps(int64_t nbOutputMaps) noexcept
2182 {
2183 mImpl->setNbOutputMaps(nbOutputMaps);
2184 }
2185
2191 int64_t getNbOutputMaps() const noexcept
2192 {
2193 return mImpl->getNbOutputMaps();
2194 }
2195
2211 void setNbGroups(int64_t nbGroups) noexcept
2212 {
2213 mImpl->setNbGroups(nbGroups);
2214 }
2215
2221 int64_t getNbGroups() const noexcept
2222 {
2223 return mImpl->getNbGroups();
2224 }
2225
2235 void setKernelWeights(Weights weights) noexcept
2236 {
2237 mImpl->setKernelWeights(weights);
2238 }
2239
2245 Weights getKernelWeights() const noexcept
2246 {
2247 return mImpl->getKernelWeights();
2248 }
2249
2260 void setBiasWeights(Weights weights) noexcept
2261 {
2262 mImpl->setBiasWeights(weights);
2263 }
2264
2270 Weights getBiasWeights() const noexcept
2271 {
2272 return mImpl->getBiasWeights();
2273 }
2274
2287 void setPrePadding(Dims const& padding) noexcept
2288 {
2289 mImpl->setPrePadding(padding);
2290 }
2291
2297 Dims getPrePadding() const noexcept
2298 {
2299 return mImpl->getPrePadding();
2300 }
2301
2314 void setPostPadding(Dims const& padding) noexcept
2315 {
2316 mImpl->setPostPadding(padding);
2317 }
2318
2324 Dims getPostPadding() const noexcept
2325 {
2326 return mImpl->getPostPadding();
2327 }
2328
2338 void setPaddingMode(PaddingMode paddingMode) noexcept
2339 {
2340 mImpl->setPaddingMode(paddingMode);
2341 }
2342
2351 {
2352 return mImpl->getPaddingMode();
2353 }
2354
2365 void setKernelSizeNd(Dims const& kernelSize) noexcept
2366 {
2367 mImpl->setKernelSizeNd(kernelSize);
2368 }
2369
2375 Dims getKernelSizeNd() const noexcept
2376 {
2377 return mImpl->getKernelSizeNd();
2378 }
2379
2392 void setStrideNd(Dims const& stride) noexcept
2393 {
2394 mImpl->setStrideNd(stride);
2395 }
2396
2402 Dims getStrideNd() const noexcept
2403 {
2404 return mImpl->getStrideNd();
2405 }
2406
2420 void setPaddingNd(Dims const& padding) noexcept
2421 {
2422 mImpl->setPaddingNd(padding);
2423 }
2424
2432 Dims getPaddingNd() const noexcept
2433 {
2434 return mImpl->getPaddingNd();
2435 }
2436
2449 using ILayer::setInput;
2450
2458 void setDilationNd(Dims const& dilation) noexcept
2459 {
2460 mImpl->setDilationNd(dilation);
2461 }
2462
2468 Dims getDilationNd() const noexcept
2469 {
2470 return mImpl->getDilationNd();
2471 }
2472
2473protected:
2474 virtual ~IDeconvolutionLayer() noexcept = default;
2475 apiv::VDeconvolutionLayer* mImpl;
2476};
2477
2490enum class ElementWiseOperation : int32_t
2491{
2492 kSUM = 0,
2493 kPROD = 1,
2494 kMAX = 2,
2495 kMIN = 3,
2496 kSUB = 4,
2497 kDIV = 5,
2498 kPOW = 6,
2499 kFLOOR_DIV = 7,
2500 kAND = 8,
2501 kOR = 9,
2502 kXOR = 10,
2503 kEQUAL = 11,
2504 kGREATER = 12,
2505 kLESS = 13
2506};
2507
2508namespace impl
2509{
2515template <>
2517{
2518 static constexpr int32_t kVALUE = 14;
2519};
2520} // namespace impl
2521
2542{
2543public:
2554 {
2555 return mImpl->setOperation(op);
2556 }
2557
2566 {
2567 return mImpl->getOperation();
2568 }
2569
2570protected:
2571 apiv::VElementWiseLayer* mImpl;
2572 virtual ~IElementWiseLayer() noexcept = default;
2573};
2574
2580enum class GatherMode : int32_t
2581{
2582 kDEFAULT = 0,
2583 kELEMENT = 1,
2584 kND = 2
2585};
2586
2592template <>
2593constexpr inline int32_t EnumMax<GatherMode>() noexcept
2594{
2595 return 3;
2596}
2597
2674class IGatherLayer : public ILayer
2675{
2676public:
2686 void setGatherAxis(int32_t axis) noexcept
2687 {
2688 mImpl->setGatherAxis(axis);
2689 }
2690
2698 int32_t getGatherAxis() const noexcept
2699 {
2700 return mImpl->getGatherAxis();
2701 }
2702
2721 void setNbElementWiseDims(int32_t elementWiseDims) noexcept
2722 {
2723 mImpl->setNbElementWiseDims(elementWiseDims);
2724 }
2725
2731 int32_t getNbElementWiseDims() const noexcept
2732 {
2733 return mImpl->getNbElementWiseDims();
2734 }
2735
2741 void setMode(GatherMode mode) noexcept
2742 {
2743 mImpl->setMode(mode);
2744 }
2745
2751 GatherMode getMode() const noexcept
2752 {
2753 return mImpl->getMode();
2754 }
2755
2756protected:
2757 apiv::VGatherLayer* mImpl;
2758 virtual ~IGatherLayer() noexcept = default;
2759};
2760
2773{
2774public:
2781 {
2782 return mImpl->getPlugin();
2783 }
2784
2785protected:
2786 apiv::VPluginV2Layer* mImpl;
2787 virtual ~IPluginV2Layer() noexcept = default;
2788};
2789
2800{
2801public:
2808 {
2809 return mImpl->getPlugin();
2810 }
2811
2812protected:
2813 apiv::VPluginV3Layer* mImpl;
2814 virtual ~IPluginV3Layer() noexcept = default;
2815};
2816
2833enum class UnaryOperation : int32_t
2834{
2835 kEXP = 0,
2836 kLOG = 1,
2837 kSQRT = 2,
2838 kRECIP = 3,
2839 kABS = 4,
2840 kNEG = 5,
2841 kSIN = 6,
2842 kCOS = 7,
2843 kTAN = 8,
2844 kSINH = 9,
2845 kCOSH = 10,
2846 kASIN = 11,
2847 kACOS = 12,
2848 kATAN = 13,
2849 kASINH = 14,
2850 kACOSH = 15,
2851 kATANH = 16,
2852 kCEIL = 17,
2853 kFLOOR = 18,
2854 kERF = 19,
2855 kNOT = 20,
2856 kSIGN = 21,
2857 kROUND = 22,
2858 kISINF = 23,
2859 kISNAN = 24,
2860};
2861
2867template <>
2868constexpr inline int32_t EnumMax<UnaryOperation>() noexcept
2869{
2870 return 25;
2871}
2872
2880class IUnaryLayer : public ILayer
2881{
2882public:
2891 {
2892 mImpl->setOperation(op);
2893 }
2894
2901 {
2902 return mImpl->getOperation();
2903 }
2904
2905protected:
2906 apiv::VUnaryLayer* mImpl;
2907 virtual ~IUnaryLayer() noexcept = default;
2908};
2909
2933enum class ReduceOperation : int32_t
2934{
2935 kSUM = 0,
2936 kPROD = 1,
2937 kMAX = 2,
2938 kMIN = 3,
2939 kAVG = 4,
2940 kNONE = 5,
2941};
2942
2948template <>
2949constexpr inline int32_t EnumMax<ReduceOperation>() noexcept
2950{
2951 return 6;
2952}
2953
2961enum class CollectiveOperation : int32_t
2962{
2963 kALL_REDUCE = 0,
2964 kALL_GATHER = 1,
2965 kBROADCAST = 2,
2966 kREDUCE = 3,
2967 kREDUCE_SCATTER = 4,
2968};
2969
2975template <>
2977{
2978 static constexpr int32_t kVALUE = 5;
2979};
2980
2988class IReduceLayer : public ILayer
2989{
2990public:
2997 {
2998 mImpl->setOperation(op);
2999 }
3000
3007 {
3008 return mImpl->getOperation();
3009 }
3010
3016 void setReduceAxes(uint32_t reduceAxes) noexcept
3017 {
3018 mImpl->setReduceAxes(reduceAxes);
3019 }
3020
3026 uint32_t getReduceAxes() const noexcept
3027 {
3028 return mImpl->getReduceAxes();
3029 }
3030
3036 void setKeepDimensions(bool keepDimensions) noexcept
3037 {
3038 mImpl->setKeepDimensions(keepDimensions);
3039 }
3040
3046 bool getKeepDimensions() const noexcept
3047 {
3048 return mImpl->getKeepDimensions();
3049 }
3050
3051protected:
3052 apiv::VReduceLayer* mImpl;
3053 virtual ~IReduceLayer() noexcept = default;
3054};
3055
3068class IPaddingLayer : public ILayer
3069{
3070public:
3080 void setPrePaddingNd(Dims const& padding) noexcept
3081 {
3082 mImpl->setPrePaddingNd(padding);
3083 }
3084
3092 Dims getPrePaddingNd() const noexcept
3093 {
3094 return mImpl->getPrePaddingNd();
3095 }
3096
3106 void setPostPaddingNd(Dims const& padding) noexcept
3107 {
3108 mImpl->setPostPaddingNd(padding);
3109 }
3110
3118 Dims getPostPaddingNd() const noexcept
3119 {
3120 return mImpl->getPostPaddingNd();
3121 }
3122
3123protected:
3124 apiv::VPaddingLayer* mImpl;
3125 virtual ~IPaddingLayer() noexcept = default;
3126};
3127
3134{
3141 int32_t order[Dims::MAX_DIMS];
3142};
3143
3156class IShuffleLayer : public ILayer
3157{
3158public:
3168 void setFirstTranspose(Permutation permutation) noexcept
3169 {
3170 mImpl->setFirstTranspose(permutation);
3171 }
3172
3181 {
3182 return mImpl->getFirstTranspose();
3183 }
3184
3208 void setReshapeDimensions(Dims const& dimensions) noexcept
3209 {
3210 mImpl->setReshapeDimensions(dimensions);
3211 }
3212
3222 {
3223 return mImpl->getReshapeDimensions();
3224 }
3225
3231 //
3254 using ILayer::setInput;
3255
3268 void setSecondTranspose(Permutation permutation) noexcept
3269 {
3270 mImpl->setSecondTranspose(permutation);
3271 }
3272
3281 {
3282 return mImpl->getSecondTranspose();
3283 }
3284
3296 void setZeroIsPlaceholder(bool zeroIsPlaceholder) noexcept
3297 {
3298 return mImpl->setZeroIsPlaceholder(zeroIsPlaceholder);
3299 }
3300
3309 bool getZeroIsPlaceholder() const noexcept
3310 {
3311 return mImpl->getZeroIsPlaceholder();
3312 }
3313
3314protected:
3315 apiv::VShuffleLayer* mImpl;
3316 virtual ~IShuffleLayer() noexcept = default;
3317};
3318
3324enum class SampleMode : int32_t
3325{
3326 kSTRICT_BOUNDS = 0,
3327 kWRAP = 1,
3328 kCLAMP = 2,
3329 kFILL = 3,
3330 kREFLECT = 4,
3333};
3334
3340template <>
3341constexpr inline int32_t EnumMax<SampleMode>() noexcept
3342{
3343 return 5;
3344}
3345
3408class ISliceLayer : public ILayer
3409{
3410public:
3420 void setStart(Dims const& start) noexcept
3421 {
3422 mImpl->setStart(start);
3423 }
3424
3435 Dims getStart() const noexcept
3436 {
3437 return mImpl->getStart();
3438 }
3439
3449 void setSize(Dims const& size) noexcept
3450 {
3451 return mImpl->setSize(size);
3452 }
3453
3464 Dims getSize() const noexcept
3465 {
3466 return mImpl->getSize();
3467 }
3468
3478 void setStride(Dims const& stride) noexcept
3479 {
3480 mImpl->setStride(stride);
3481 }
3482
3493 Dims getStride() const noexcept
3494 {
3495 return mImpl->getStride();
3496 }
3497
3503 void setMode(SampleMode mode) noexcept
3504 {
3505 mImpl->setMode(mode);
3506 }
3507
3513 SampleMode getMode() const noexcept
3514 {
3515 return mImpl->getMode();
3516 }
3517
3545 using ILayer::setInput;
3546
3556 void setAxes(Dims const& axes) noexcept
3557 {
3558 mImpl->setAxes(axes);
3559 }
3560
3571 Dims getAxes() const noexcept
3572 {
3573 return mImpl->getAxes();
3574 }
3575
3576protected:
3577 apiv::VSliceLayer* mImpl;
3578 virtual ~ISliceLayer() noexcept = default;
3579};
3580
3593class IShapeLayer : public ILayer
3594{
3595protected:
3596 apiv::VShapeLayer* mImpl;
3597 virtual ~IShapeLayer() noexcept = default;
3598};
3599
3605enum class TopKOperation : int32_t
3606{
3607 kMAX = 0,
3608 kMIN = 1,
3609};
3610
3616template <>
3617constexpr inline int32_t EnumMax<TopKOperation>() noexcept
3618{
3619 return 2;
3620}
3621
3633class ITopKLayer : public ILayer
3634{
3635public:
3641 void setOperation(TopKOperation op) noexcept
3642 {
3643 mImpl->setOperation(op);
3644 }
3645
3652 {
3653 return mImpl->getOperation();
3654 }
3655
3665 void setK(int32_t k) noexcept
3666 {
3667 mImpl->setK(k);
3668 }
3669
3679 int32_t getK() const noexcept
3680 {
3681 return mImpl->getK();
3682 }
3683
3689 void setReduceAxes(uint32_t reduceAxes) noexcept
3690 {
3691 mImpl->setReduceAxes(reduceAxes);
3692 }
3693
3699 uint32_t getReduceAxes() const noexcept
3700 {
3701 return mImpl->getReduceAxes();
3702 }
3703
3718 using ILayer::setInput;
3719
3730 bool setIndicesType(DataType type) noexcept
3731 {
3732 return mImpl->setIndicesType(type);
3733 }
3734
3742 DataType getIndicesType() const noexcept
3743 {
3744 return mImpl->getIndicesType();
3745 }
3746
3747protected:
3748 apiv::VTopKLayer* mImpl;
3749 virtual ~ITopKLayer() noexcept = default;
3750};
3751
3758enum class MatrixOperation : int32_t
3759{
3763 kNONE = 0,
3764
3766 kTRANSPOSE = 1,
3767
3778 kVECTOR = 2,
3779};
3780
3786template <>
3787constexpr inline int32_t EnumMax<MatrixOperation>() noexcept
3788{
3789 return 3;
3790}
3791
3818{
3819public:
3828 void setOperation(int32_t index, MatrixOperation op) noexcept
3829 {
3830 mImpl->setOperation(index, op);
3831 }
3832
3840 MatrixOperation getOperation(int32_t index) const noexcept
3841 {
3842 return mImpl->getOperation(index);
3843 }
3844
3845protected:
3846 apiv::VMatrixMultiplyLayer* mImpl;
3847 virtual ~IMatrixMultiplyLayer() noexcept = default;
3848};
3849
3871class INonZeroLayer : public ILayer
3872{
3873public:
3884 bool setIndicesType(DataType type) noexcept
3885 {
3886 return mImpl->setIndicesType(type);
3887 }
3888
3896 DataType getIndicesType() const noexcept
3897 {
3898 return mImpl->getIndicesType();
3899 }
3900
3901protected:
3902 virtual ~INonZeroLayer() noexcept = default;
3903 apiv::VNonZeroLayer* mImpl;
3904};
3905
3921{
3922protected:
3923 apiv::VRaggedSoftMaxLayer* mImpl;
3924 virtual ~IRaggedSoftMaxLayer() noexcept = default;
3925};
3926
3971{
3972protected:
3973 apiv::VIdentityLayer* mImpl;
3974 virtual ~IIdentityLayer() noexcept = default;
3975};
3976
3983class ICastLayer : public ILayer
3984{
3985public:
3993 void setToType(DataType toType) noexcept
3994 {
3995 mImpl->setToType(toType);
3996 }
3997
4004 DataType getToType() const noexcept
4005 {
4006 return mImpl->getToType();
4007 }
4008
4009protected:
4010 apiv::VCastLayer* mImpl;
4011 virtual ~ICastLayer() noexcept = default;
4012};
4013
4023{
4024public:
4033 void setWeights(Weights weights) noexcept
4034 {
4035 mImpl->setWeights(weights);
4036 }
4037
4043 Weights getWeights() const noexcept
4044 {
4045 return mImpl->getWeights();
4046 }
4047
4055 void setDimensions(Dims const& dimensions) noexcept
4056 {
4057 mImpl->setDimensions(dimensions);
4058 }
4059
4067 Dims getDimensions() const noexcept
4068 {
4069 return mImpl->getDimensions();
4070 }
4071
4072protected:
4073 apiv::VConstantLayer* mImpl;
4074 virtual ~IConstantLayer() noexcept = default;
4075};
4076
4087{
4088protected:
4089 apiv::VParametricReLULayer* mImpl;
4090 virtual ~IParametricReLULayer() noexcept = default;
4091};
4092
4098enum class InterpolationMode : int32_t
4099{
4100 kNEAREST = 0,
4101 kLINEAR = 1,
4102 kCUBIC = 2
4103};
4104
4105namespace impl
4106{
4112template <>
4114{
4115 static constexpr int32_t kVALUE = 3;
4116};
4117} // namespace impl
4118
4127{
4140 kALIGN_CORNERS = 0,
4141
4148 kASYMMETRIC = 1,
4149
4156 kHALF_PIXEL = 2,
4157};
4158
4159namespace impl
4160{
4166template <>
4168{
4169 static constexpr int32_t kVALUE = 3;
4170};
4171} // namespace impl
4172
4180enum class ResizeSelector : int32_t
4181{
4183 kFORMULA = 0,
4184
4186 kUPPER = 1,
4187};
4188
4189namespace impl
4190{
4196template <>
4198{
4199 static constexpr int32_t kVALUE = 2;
4200};
4201} // namespace impl
4202
4210enum class ResizeRoundMode : int32_t
4211{
4213 kHALF_UP = 0,
4214
4216 kHALF_DOWN = 1,
4217
4219 kFLOOR = 2,
4220
4222 kCEIL = 3,
4223};
4224
4225namespace impl
4226{
4232template <>
4234{
4235 static constexpr int32_t kVALUE = 4;
4236};
4237} // namespace impl
4238
4275class IResizeLayer : public ILayer
4276{
4277public:
4296 void setOutputDimensions(Dims const& dimensions) noexcept
4297 {
4298 return mImpl->setOutputDimensions(dimensions);
4299 }
4300
4306 Dims getOutputDimensions() const noexcept
4307 {
4308 return mImpl->getOutputDimensions();
4309 }
4310
4336 void setScales(float const* scales, int32_t nbScales) noexcept
4337 {
4338 mImpl->setScales(scales, nbScales);
4339 }
4340
4355 int32_t getScales(int32_t size, float* scales) const noexcept
4356 {
4357 return mImpl->getScales(size, scales);
4358 }
4359
4367 void setResizeMode(InterpolationMode interpolationMode) noexcept
4368 {
4369 mImpl->setResizeMode(interpolationMode);
4370 }
4371
4378 {
4379 return mImpl->getResizeMode();
4380 }
4381
4401 using ILayer::setInput;
4402
4413 {
4414 mImpl->setCoordinateTransformation(coordTransform);
4415 }
4416
4423 {
4424 return mImpl->getCoordinateTransformation();
4425 }
4426
4438 {
4439 mImpl->setSelectorForSinglePixel(selector);
4440 }
4441
4448 {
4449 return mImpl->getSelectorForSinglePixel();
4450 }
4451
4462 {
4463 mImpl->setNearestRounding(value);
4464 }
4465
4472 {
4473 return mImpl->getNearestRounding();
4474 }
4475
4493 void setCubicCoeff(float A) noexcept
4494 {
4495 mImpl->setCubicCoeff(A);
4496 }
4497
4503 float getCubicCoeff() const noexcept
4504 {
4505 return mImpl->getCubicCoeff();
4506 }
4507
4516 void setExcludeOutside(bool excludeFlag) noexcept
4517 {
4518 mImpl->setExcludeOutside(excludeFlag);
4519 }
4520
4526 bool getExcludeOutside() const noexcept
4527 {
4528 return mImpl->getExcludeOutside();
4529 }
4530
4531protected:
4532 virtual ~IResizeLayer() noexcept = default;
4533 apiv::VResizeLayer* mImpl;
4534};
4535
4541enum class LoopOutput : int32_t
4542{
4544 kLAST_VALUE = 0,
4545
4547 kCONCATENATE = 1,
4548
4550 kREVERSE = 2
4551};
4552
4558template <>
4559constexpr inline int32_t EnumMax<LoopOutput>() noexcept
4560{
4561 return 3;
4562}
4563
4569enum class TripLimit : int32_t
4570{
4571
4572 kCOUNT = 0,
4573 kWHILE = 1
4574};
4575
4581template <>
4582constexpr inline int32_t EnumMax<TripLimit>() noexcept
4583{
4584 return 2;
4585}
4586
4587class ILoop;
4588
4603{
4604public:
4608 ILoop* getLoop() const noexcept
4609 {
4610 return mBoundary->getLoop();
4611 }
4612
4613protected:
4614 virtual ~ILoopBoundaryLayer() noexcept = default;
4615 apiv::VLoopBoundaryLayer* mBoundary;
4616};
4617
4626{
4627public:
4632 {
4633 return mBoundary->getConditional();
4634 }
4635
4636protected:
4637 virtual ~IIfConditionalBoundaryLayer() noexcept = default;
4638 apiv::VConditionalBoundaryLayer* mBoundary;
4639};
4640
4647{
4648public:
4649protected:
4650 virtual ~IConditionLayer() noexcept = default;
4651 apiv::VConditionLayer* mImpl;
4652};
4653
4664{
4665public:
4666protected:
4667 virtual ~IIfConditionalOutputLayer() noexcept = default;
4668 apiv::VConditionalOutputLayer* mImpl;
4669};
4670
4677{
4678public:
4679protected:
4680 virtual ~IIfConditionalInputLayer() noexcept = default;
4681 apiv::VConditionalInputLayer* mImpl;
4682};
4683
4709{
4710public:
4721 {
4722 return mImpl->setCondition(condition);
4723 }
4724
4738 IIfConditionalOutputLayer* addOutput(ITensor& trueSubgraphOutput, ITensor& falseSubgraphOutput) noexcept
4739 {
4740 return mImpl->addOutput(trueSubgraphOutput, falseSubgraphOutput);
4741 }
4742
4751 {
4752 return mImpl->addInput(input);
4753 }
4754
4765 void setName(char const* name) noexcept
4766 {
4767 mImpl->setName(name);
4768 }
4769
4775 char const* getName() const noexcept
4776 {
4777 return mImpl->getName();
4778 }
4779
4780protected:
4781 virtual ~IIfConditional() noexcept = default;
4782 apiv::VIfConditional* mImpl;
4783};
4784
4793{
4794public:
4800 //
4813 using ILayer::setInput;
4814
4815protected:
4816 virtual ~IRecurrenceLayer() noexcept = default;
4817 apiv::VRecurrenceLayer* mImpl;
4818};
4819
4840{
4841public:
4845 LoopOutput getLoopOutput() const noexcept
4846 {
4847 return mImpl->getLoopOutput();
4848 }
4849
4862 void setAxis(int32_t axis) noexcept
4863 {
4864 mImpl->setAxis(axis);
4865 }
4866
4870 int32_t getAxis() const noexcept
4871 {
4872 return mImpl->getAxis();
4873 }
4874
4880 //
4895 using ILayer::setInput;
4896
4897protected:
4898 virtual ~ILoopOutputLayer() noexcept = default;
4899 apiv::VLoopOutputLayer* mImpl;
4900};
4901
4914{
4915public:
4919 TripLimit getTripLimit() const noexcept
4920 {
4921 return mImpl->getTripLimit();
4922 }
4923
4924protected:
4925 virtual ~ITripLimitLayer() noexcept = default;
4926 apiv::VTripLimitLayer* mImpl;
4927};
4928
4940{
4941public:
4945 void setAxis(int32_t axis) noexcept
4946 {
4947 mImpl->setAxis(axis);
4948 }
4949
4953 int32_t getAxis() const noexcept
4954 {
4955 return mImpl->getAxis();
4956 }
4957
4967 void setReverse(bool reverse) noexcept
4968 {
4969 mImpl->setReverse(reverse);
4970 }
4971
4977 bool getReverse() const noexcept
4978 {
4979 return mImpl->getReverse();
4980 }
4981
4982protected:
4983 virtual ~IIteratorLayer() noexcept = default;
4984 apiv::VIteratorLayer* mImpl;
4985};
4986
4997class ILoop : public INoCopy
4998{
4999public:
5006 IRecurrenceLayer* addRecurrence(ITensor& initialValue) noexcept
5007 {
5008 return mImpl->addRecurrence(initialValue);
5009 }
5010
5028 {
5029 return mImpl->addTripLimit(tensor, limit);
5030 }
5031
5040 IIteratorLayer* addIterator(ITensor& tensor, int32_t axis = 0, bool reverse = false) noexcept
5041 {
5042 return mImpl->addIterator(tensor, axis, reverse);
5043 }
5044
5053 ILoopOutputLayer* addLoopOutput(ITensor& tensor, LoopOutput outputKind, int32_t axis = 0) noexcept
5054 {
5055 return mImpl->addLoopOutput(tensor, outputKind, axis);
5056 }
5057
5068 void setName(char const* name) noexcept
5069 {
5070 mImpl->setName(name);
5071 }
5072
5078 char const* getName() const noexcept
5079 {
5080 return mImpl->getName();
5081 }
5082
5083protected:
5084 virtual ~ILoop() noexcept = default;
5085 apiv::VLoop* mImpl;
5086};
5087
5100class ISelectLayer : public ILayer
5101{
5102protected:
5103 virtual ~ISelectLayer() noexcept = default;
5104 apiv::VSelectLayer* mImpl;
5105};
5106
5123{
5124public:
5133 void setMessage(char const* message) noexcept
5134 {
5135 mImpl->setMessage(message);
5136 }
5137
5143 char const* getMessage() const noexcept
5144 {
5145 return mImpl->getMessage();
5146 }
5147
5148protected:
5149 virtual ~IAssertionLayer() noexcept = default;
5150
5151 apiv::VAssertionLayer* mImpl;
5152};
5153
5161enum class FillOperation : int32_t
5162{
5179 kLINSPACE = 0,
5180
5183 kRANDOM_UNIFORM = 1,
5184
5187 kRANDOM_NORMAL = 2
5188};
5189
5195template <>
5196constexpr inline int32_t EnumMax<FillOperation>() noexcept
5197{
5198 return 3;
5199}
5200
5236class IFillLayer : public ILayer
5237{
5238public:
5247 //
5248 void setDimensions(Dims const& dimensions) noexcept
5249 {
5250 mImpl->setDimensions(dimensions);
5251 }
5252
5263 Dims getDimensions() const noexcept
5264 {
5265 return mImpl->getDimensions();
5266 }
5267
5273 void setOperation(FillOperation op) noexcept
5274 {
5275 mImpl->setOperation(op);
5276 }
5277
5284 {
5285 return mImpl->getOperation();
5286 }
5287
5301 //
5302 void setAlpha(double alpha) noexcept
5303 {
5304 mImpl->setAlpha(alpha);
5305 }
5306
5317 double getAlpha() const noexcept
5318 {
5319 return mImpl->getAlpha();
5320 }
5321
5336 void setBeta(double beta) noexcept
5337 {
5338 mImpl->setBeta(beta);
5339 }
5340
5351 double getBeta() const noexcept
5352 {
5353 return mImpl->getBeta();
5354 }
5355
5396 using ILayer::setInput;
5397
5411 //
5412 void setAlphaInt64(int64_t alpha) noexcept
5413 {
5414 mImpl->setAlphaInt64(alpha);
5415 }
5416
5427 int64_t getAlphaInt64() const noexcept
5428 {
5429 return mImpl->getAlphaInt64();
5430 }
5431
5446 void setBetaInt64(int64_t beta) noexcept
5447 {
5448 mImpl->setBetaInt64(beta);
5449 }
5450
5461 int64_t getBetaInt64() const noexcept
5462 {
5463 return mImpl->getBetaInt64();
5464 }
5465
5469 bool isAlphaBetaInt64() const noexcept
5470 {
5471 return mImpl->isAlphaBetaInt64();
5472 }
5473
5487 void setToType(DataType toType) noexcept
5488 {
5489 mImpl->setToType(toType);
5490 }
5491
5499 DataType getToType() const noexcept
5500 {
5501 return mImpl->getToType();
5502 }
5503
5504protected:
5505 virtual ~IFillLayer() noexcept = default;
5506 apiv::VFillLayer* mImpl;
5507};
5508
5584{
5585public:
5594 int32_t getAxis() const noexcept
5595 {
5596 return mImpl->getAxis();
5597 }
5605 void setAxis(int32_t axis) noexcept
5606 {
5607 mImpl->setAxis(axis);
5608 }
5609
5618 bool setBlockShape(Dims const& blockShape) noexcept
5619 {
5620 return mImpl->setBlockShape(blockShape);
5621 }
5622
5630 {
5631 return mImpl->getBlockShape();
5632 }
5633
5645 void setToType(DataType toType) noexcept
5646 {
5647 mImpl->setToType(toType);
5648 }
5649
5657 DataType getToType() const noexcept
5658 {
5659 return mImpl->getToType();
5660 }
5661
5662protected:
5663 virtual ~IQuantizeLayer() noexcept = default;
5664 apiv::VQuantizeLayer* mImpl;
5665};
5666
5736{
5737public:
5746 int32_t getAxis() const noexcept
5747 {
5748 return mImpl->getAxis();
5749 }
5757 void setAxis(int32_t axis) noexcept
5758 {
5759 mImpl->setAxis(axis);
5760 }
5761
5774 bool setBlockShape(Dims const& blockShape) noexcept
5775 {
5776 return mImpl->setBlockShape(blockShape);
5777 }
5778
5786 {
5787 return mImpl->getBlockShape();
5788 }
5789
5801 void setToType(DataType toType) noexcept
5802 {
5803 mImpl->setToType(toType);
5804 }
5805
5813 DataType getToType() const noexcept
5814 {
5815 return mImpl->getToType();
5816 }
5817
5818protected:
5819 virtual ~IDequantizeLayer() noexcept = default;
5820 apiv::VDequantizeLayer* mImpl;
5821};
5822
5841{
5842public:
5854 using ILayer::setInput;
5855
5868 void setToType(DataType toType) noexcept
5869 {
5870 mImpl->setToType(toType);
5871 }
5872
5881 DataType getToType() const noexcept
5882 {
5883 return mImpl->getToType();
5884 }
5885
5894 void setScaleType(DataType scaleType) noexcept
5895 {
5896 mImpl->setScaleType(scaleType);
5897 }
5898
5907 DataType getScaleType() const noexcept
5908 {
5909 return mImpl->getScaleType();
5910 }
5911
5920 TRT_DEPRECATED void setAxis(int32_t axis) noexcept
5921 {
5922 mImpl->setAxis(axis);
5923 }
5924
5930 TRT_DEPRECATED int32_t getAxis() const noexcept
5931 {
5932 return mImpl->getAxis();
5933 }
5934
5943 TRT_DEPRECATED void setBlockSize(int32_t size) noexcept
5944 {
5945 mImpl->setBlockSize(size);
5946 }
5947
5953 TRT_DEPRECATED int32_t getBlockSize() const noexcept
5954 {
5955 return mImpl->getBlockSize();
5956 }
5957
5966 void setBlockShape(Dims const& blockShape) noexcept
5967 {
5968 mImpl->setBlockShape(blockShape);
5969 }
5970
5978 Dims getBlockShape() const noexcept
5979 {
5980 return mImpl->getBlockShape();
5981 }
5982
5983protected:
5984 virtual ~IDynamicQuantizeLayer() noexcept = default;
5985 apiv::VDynamicQuantizeLayer* mImpl;
5986};
5987
6022class IEinsumLayer : public ILayer
6023{
6024public:
6034 bool setEquation(char const* equation) noexcept
6035 {
6036 return mImpl->setEquation(equation);
6037 }
6038
6044 char const* getEquation() const noexcept
6045 {
6046 return mImpl->getEquation();
6047 }
6048
6049protected:
6050 virtual ~IEinsumLayer() noexcept = default;
6051 apiv::VEinsumLayer* mImpl;
6052};
6053
6061enum class ScatterMode : int32_t
6062{
6063 kELEMENT = 0,
6064 kND = 1,
6065};
6066
6072template <>
6073constexpr inline int32_t EnumMax<ScatterMode>() noexcept
6074{
6075 return 2;
6076}
6077
6135class IScatterLayer : public ILayer
6136{
6137public:
6143 void setMode(ScatterMode mode) noexcept
6144 {
6145 mImpl->setMode(mode);
6146 }
6147
6153 ScatterMode getMode() const noexcept
6154 {
6155 return mImpl->getMode();
6156 }
6157
6163 void setAxis(int32_t axis) noexcept
6164 {
6165 mImpl->setAxis(axis);
6166 }
6167
6171 int32_t getAxis() const noexcept
6172 {
6173 return mImpl->getAxis();
6174 }
6175
6176protected:
6177 apiv::VScatterLayer* mImpl;
6178 virtual ~IScatterLayer() noexcept = default;
6179}; // class IScatterLayer
6180
6207class IOneHotLayer : public ILayer
6208{
6209public:
6215 void setAxis(int32_t axis) noexcept
6216 {
6217 mImpl->setAxis(axis);
6218 }
6219
6223 int32_t getAxis() const noexcept
6224 {
6225 return mImpl->getAxis();
6226 }
6227
6228protected:
6229 apiv::VOneHotLayer* mImpl;
6230 virtual ~IOneHotLayer() noexcept = default;
6231};
6232
6245{
6246public:
6253 {
6254 mImpl->setInterpolationMode(mode);
6255 }
6256
6265 {
6266 return mImpl->getInterpolationMode();
6267 }
6268
6274 void setAlignCorners(bool alignCorners) noexcept
6275 {
6276 mImpl->setAlignCorners(alignCorners);
6277 }
6278
6286 bool getAlignCorners() const noexcept
6287 {
6288 return mImpl->getAlignCorners();
6289 }
6290
6298 bool setSampleMode(SampleMode mode) noexcept
6299 {
6300 return mImpl->setSampleMode(mode);
6301 }
6302
6310 SampleMode getSampleMode() const noexcept
6311 {
6312 return mImpl->getSampleMode();
6313 }
6314
6315protected:
6316 apiv::VGridSampleLayer* mImpl;
6317 virtual ~IGridSampleLayer() noexcept = default;
6318}; // class IGridSampleLayer
6319
6327enum class BoundingBoxFormat : int32_t
6328{
6330 kCORNER_PAIRS = 0,
6332 kCENTER_SIZES = 1
6333};
6334
6340template <>
6341constexpr inline int32_t EnumMax<BoundingBoxFormat>() noexcept
6342{
6343 return 2;
6344}
6345
6396class INMSLayer : public ILayer
6397{
6398public:
6409 {
6410 mImpl->setBoundingBoxFormat(fmt);
6411 }
6412
6421 {
6422 return mImpl->getBoundingBoxFormat();
6423 }
6424
6434 void setTopKBoxLimit(int32_t limit) noexcept
6435 {
6436 mImpl->setTopKBoxLimit(limit);
6437 }
6438
6444 int32_t getTopKBoxLimit() const noexcept
6445 {
6446 return mImpl->getTopKBoxLimit();
6447 }
6448
6467 using ILayer::setInput;
6468
6479 bool setIndicesType(DataType type) noexcept
6480 {
6481 return mImpl->setIndicesType(type);
6482 }
6483
6491 DataType getIndicesType() const noexcept
6492 {
6493 return mImpl->getIndicesType();
6494 }
6495
6496protected:
6497 apiv::VNMSLayer* mImpl;
6498 virtual ~INMSLayer() noexcept = default;
6499}; // class INMSLayer
6500
6514{
6515public:
6524 void setBatchAxis(int32_t batchAxis) noexcept
6525 {
6526 mImpl->setBatchAxis(batchAxis);
6527 }
6528
6534 int32_t getBatchAxis() const noexcept
6535 {
6536 return mImpl->getBatchAxis();
6537 }
6538
6547 void setSequenceAxis(int32_t sequenceAxis) noexcept
6548 {
6549 mImpl->setSequenceAxis(sequenceAxis);
6550 }
6551
6557 int32_t getSequenceAxis() const noexcept
6558 {
6559 return mImpl->getSequenceAxis();
6560 }
6561
6562protected:
6563 apiv::VReverseSequenceLayer* mImpl;
6564 virtual ~IReverseSequenceLayer() noexcept = default;
6565}; // class IReverseSequenceLayer
6566
6586{
6587public:
6595 void setEpsilon(float eps) noexcept
6596 {
6597 return mImpl->setEpsilon(eps);
6598 }
6599
6605 float getEpsilon() const noexcept
6606 {
6607 return mImpl->getEpsilon();
6608 }
6609
6615 void setAxes(uint32_t axesMask) noexcept
6616 {
6617 return mImpl->setAxes(axesMask);
6618 }
6619
6625 uint32_t getAxes() const noexcept
6626 {
6627 return mImpl->getAxes();
6628 }
6629
6646 void setNbGroups(int64_t nbGroups) noexcept
6647 {
6648 return mImpl->setNbGroups(nbGroups);
6649 }
6650
6656 int64_t getNbGroups() const noexcept
6657 {
6658 return mImpl->getNbGroups();
6659 }
6660
6685 {
6686 return mImpl->setComputePrecision(type);
6687 }
6688
6697 {
6698 return mImpl->getComputePrecision();
6699 }
6700
6706 TRT_NODISCARD bool isV2() const noexcept
6707 {
6708 return mImpl->isV2();
6709 }
6710
6711protected:
6712 apiv::VNormalizationLayer* mImpl;
6713 virtual ~INormalizationLayer() noexcept = default;
6714};
6715
6716
6725class ISqueezeLayer : public ILayer
6726{
6727public:
6740 using ILayer::setInput;
6741
6742protected:
6743 apiv::VSqueezeLayer* mImpl;
6744 virtual ~ISqueezeLayer() noexcept = default;
6745};
6746
6756{
6757public:
6771 using ILayer::setInput;
6772
6773protected:
6774 apiv::VUnsqueezeLayer* mImpl;
6775 virtual ~IUnsqueezeLayer() noexcept = default;
6776};
6777
6789enum class CumulativeOperation : int32_t
6790{
6791 kSUM = 0,
6792};
6793
6794namespace impl
6795{
6796
6802template <>
6804{
6805 static constexpr int32_t kVALUE = 1;
6806};
6807
6808} // namespace impl
6809
6838{
6839public:
6850 {
6851 return mImpl->setOperation(op);
6852 }
6853
6862 {
6863 return mImpl->getOperation();
6864 }
6865
6873 void setExclusive(bool exclusive) noexcept
6874 {
6875 mImpl->setExclusive(exclusive);
6876 }
6877
6885 bool getExclusive() const noexcept
6886 {
6887 return mImpl->getExclusive();
6888 }
6889
6897 void setReverse(bool reverse) noexcept
6898 {
6899 mImpl->setReverse(reverse);
6900 }
6901
6909 bool getReverse() const noexcept
6910 {
6911 return mImpl->getReverse();
6912 }
6913
6914protected:
6915 apiv::VCumulativeLayer* mImpl;
6916 virtual ~ICumulativeLayer() noexcept = default;
6917};
6918
6924enum class AttentionNormalizationOp : int32_t
6925{
6926 kNONE
6927 = 0,
6928 kSOFTMAX = 1,
6929};
6930
6931namespace impl
6932{
6938template <>
6940{
6941 static constexpr int32_t kVALUE = 2;
6942};
6943
6944} // namespace impl
6945
6956{
6957public:
6961 IAttention* getAttention() const noexcept
6962 {
6963 return mBoundary->getAttention();
6964 }
6965
6966protected:
6967 virtual ~IAttentionBoundaryLayer() noexcept = default;
6968 apiv::VAttentionBoundaryLayer* mBoundary;
6969};
6970
6982{
6983public:
6999 using ILayer::setInput;
7000
7001protected:
7002 virtual ~IAttentionInputLayer() noexcept = default;
7003 apiv::VAttentionInputLayer* mImpl;
7004};
7005
7017{
7018public:
7019protected:
7020 virtual ~IAttentionOutputLayer() noexcept = default;
7021 apiv::VAttentionOutputLayer* mImpl;
7022};
7023
7073class IAttention : public INoCopy
7074{
7075public:
7084 {
7085 return mImpl->setNormalizationOperation(op);
7086 }
7087
7096 {
7097 return mImpl->getNormalizationOperation();
7098 }
7099
7112 bool setMask(ITensor& mask) noexcept
7113 {
7114 return mImpl->setMask(mask);
7115 }
7116
7124 ITensor* getMask() noexcept
7125 {
7126 return mImpl->getMask();
7127 }
7128
7137 bool setCausal(bool isCausal) noexcept
7138 {
7139 return mImpl->setCausal(isCausal);
7140 }
7141
7149 bool getCausal() const noexcept
7150 {
7151 return mImpl->getCausal();
7152 }
7153
7161 bool setDecomposable(bool decomposable) noexcept
7162 {
7163 return mImpl->setDecomposable(decomposable);
7164 }
7165
7174 bool getDecomposable() const noexcept
7175 {
7176 return mImpl->getDecomposable();
7177 }
7178
7193 bool setInput(int32_t index, ITensor& input) noexcept
7194 {
7195 return mImpl->setInput(index, input);
7196 }
7197
7202 int32_t getNbInputs() const noexcept
7203 {
7204 return mImpl->getNbInputs();
7205 }
7206
7214 ITensor* getInput(int32_t index) const noexcept
7215 {
7216 return mImpl->getInput(index);
7217 }
7218
7222 int32_t getNbOutputs() const noexcept
7223 {
7224 return mImpl->getNbOutputs();
7225 }
7226
7234 ITensor* getOutput(int32_t index) const noexcept
7235 {
7236 return mImpl->getOutput(index);
7237 }
7238
7251 bool setName(char const* name) noexcept
7252 {
7253 return mImpl->setName(name);
7254 }
7255
7263 char const* getName() const noexcept
7264 {
7265 return mImpl->getName();
7266 }
7267
7280 {
7281 return mImpl->setNormalizationQuantizeScale(tensor);
7282 }
7283
7291 {
7292 return mImpl->getNormalizationQuantizeScale();
7293 }
7294
7304 {
7305 return mImpl->setNormalizationQuantizeToType(type);
7306 }
7307
7316 {
7317 return mImpl->getNormalizationQuantizeToType();
7318 }
7319
7335 bool setMetadata(char const* metadata) noexcept
7336 {
7337 return mImpl->setMetadata(metadata);
7338 }
7339
7348 char const* getMetadata() const noexcept
7349 {
7350 return mImpl->getMetadata();
7351 }
7352
7364 bool setNbRanks(int32_t nbRanks) noexcept
7365 {
7366 return mImpl->setNbRanks(nbRanks);
7367 }
7368
7376 int32_t getNbRanks() const noexcept
7377 {
7378 return mImpl->getNbRanks();
7379 }
7380
7381protected:
7382 apiv::VAttention* mImpl;
7383 virtual ~IAttention() noexcept = default;
7384};
7385
7393{
7394public:
7400 void setInterleaved(bool interleaved) noexcept
7401 {
7402 mImpl->setInterleaved(interleaved);
7403 }
7404
7405
7411 TRT_NODISCARD bool getInterleaved() const noexcept
7412 {
7413 return mImpl->getInterleaved();
7414 }
7415
7416
7422 TRT_NODISCARD bool setRotaryEmbeddingDim(int32_t rotaryEmbeddingDim) noexcept
7423 {
7424 return mImpl->setRotaryEmbeddingDim(rotaryEmbeddingDim);
7425 }
7426
7427
7433 TRT_NODISCARD int32_t getRotaryEmbeddingDim() const noexcept
7434 {
7435 return mImpl->getRotaryEmbeddingDim();
7436 }
7437
7438
7452 using ILayer::setInput;
7453
7454
7455protected:
7456 apiv::VRotaryEmbeddingLayer* mImpl;
7457 virtual ~IRotaryEmbeddingLayer() noexcept = default;
7458};
7459
7465enum class KVCacheMode : int32_t
7466{
7467 kLINEAR = 0,
7468};
7469
7470namespace impl
7471{
7477template <>
7479{
7480 static constexpr int32_t kVALUE = 1;
7481};
7482
7483} // namespace impl
7484
7505{
7506public:
7519 using ILayer::setInput;
7520
7528 bool setCacheMode(KVCacheMode cacheMode) noexcept
7529 {
7530 return mImpl->setCacheMode(cacheMode);
7531 }
7532
7538 KVCacheMode getCacheMode() const noexcept
7539 {
7540 return mImpl->getCacheMode();
7541 }
7542
7543protected:
7544 apiv::VKVCacheUpdateLayer* mImpl;
7545 virtual ~IKVCacheUpdateLayer() noexcept = default;
7546};
7547
7553enum class MoEActType : int32_t
7554{
7555 kNONE = 0,
7556 kSILU = 1,
7557};
7558
7559namespace impl
7560{
7561
7567template <>
7569{
7570 static constexpr int32_t kVALUE = 2;
7571};
7572
7573} // namespace impl
7574
7686class IMoELayer : public ILayer
7687{
7688public:
7700 void setGatedWeights(ITensor& fcGateWeights, ITensor& fcUpWeights, ITensor& fcDownWeights, MoEActType activationType) noexcept
7701 {
7702 mImpl->setGatedWeights(fcGateWeights, fcUpWeights, fcDownWeights, activationType);
7703 }
7704
7712 void setGatedBiases(ITensor& fcGateBiases, ITensor& fcUpBiases, ITensor& fcDownBiases) noexcept
7713 {
7714 mImpl->setGatedBiases(fcGateBiases, fcUpBiases, fcDownBiases);
7715 }
7716
7724 void setActivationType(MoEActType activationType) noexcept
7725 {
7726 mImpl->setActivationType(activationType);
7727 }
7728
7737 {
7738 return mImpl->getActivationType();
7739 }
7740
7759 void setQuantizationStatic(ITensor& fcDownActivationScale, DataType dataType) noexcept
7760 {
7761 mImpl->setQuantizationStatic(fcDownActivationScale, dataType);
7762 }
7763
7788 void setQuantizationDynamicDblQ(ITensor& fcDownActivationDblQScale, DataType dataType, Dims const& blockShape, DataType dynQOutputScaleType) noexcept
7789 {
7790 mImpl->setQuantizationDynamicDblQ(fcDownActivationDblQScale, dataType, blockShape, dynQOutputScaleType);
7791 }
7792
7804 {
7805 mImpl->setQuantizationToType(type);
7806 }
7807
7816 {
7817 return mImpl->getQuantizationToType();
7818 }
7819
7831 void setQuantizationBlockShape(Dims const& blockShape) noexcept
7832 {
7833 mImpl->setQuantizationBlockShape(blockShape);
7834 }
7835
7844 {
7845 return mImpl->getQuantizationBlockShape();
7846 }
7847
7856 {
7857 mImpl->setDynQOutputScaleType(type);
7858 }
7859
7868 {
7869 return mImpl->getDynQOutputScaleType();
7870 }
7871
7888 void setSwigluParams(float limit, float alpha, float beta) noexcept
7889 {
7890 mImpl->setSwigluParams(limit, alpha, beta);
7891 }
7892
7902 void setSwigluParamLimit(float limit) noexcept
7903 {
7904 mImpl->setSwigluParamLimit(limit);
7905 }
7906
7914 float getSwigluParamLimit() const noexcept
7915 {
7916 return mImpl->getSwigluParamLimit();
7917 }
7918
7928 void setSwigluParamAlpha(float alpha) noexcept
7929 {
7930 mImpl->setSwigluParamAlpha(alpha);
7931 }
7932
7940 float getSwigluParamAlpha() const noexcept
7941 {
7942 return mImpl->getSwigluParamAlpha();
7943 }
7944
7954 void setSwigluParamBeta(float beta) noexcept
7955 {
7956 mImpl->setSwigluParamBeta(beta);
7957 }
7958
7966 float getSwigluParamBeta() const noexcept
7967 {
7968 return mImpl->getSwigluParamBeta();
7969 }
7970
7983 void setInput(int32_t index, ITensor& tensor) noexcept
7984 {
7985 mImpl->setInput(index, tensor);
7986 }
7987
7988 using ILayer::setInput;
7989
7990protected:
7991 virtual ~IMoELayer() noexcept = default;
7992 apiv::VMoELayer* mImpl;
7993};
7994
8003{
8004protected:
8005 virtual ~IDistCollectiveLayer() noexcept = default;
8006 apiv::VDistCollectiveLayer* mImpl;
8007}; // class IDistCollectiveLayer
8008
8027{
8028public:
8029 virtual ~INetworkDefinition() noexcept = default;
8030
8066 ITensor* addInput(char const* name, DataType type, Dims const& dimensions) noexcept
8067 {
8068 return mImpl->addInput(name, type, dimensions);
8069 }
8070
8080 void markOutput(ITensor& tensor) noexcept
8081 {
8082 mImpl->markOutput(tensor);
8083 }
8084
8098 bool markDebug(ITensor& tensor) noexcept
8099 {
8100 return mImpl->markDebug(tensor);
8101 }
8102
8114 bool unmarkDebug(ITensor& tensor) noexcept
8115 {
8116 return mImpl->unmarkDebug(tensor);
8117 }
8118
8124 bool isDebugTensor(ITensor const& tensor) const noexcept
8125 {
8126 return mImpl->isDebugTensor(tensor);
8127 }
8128
8147 {
8148 return mImpl->markUnfusedTensorsAsDebugTensors();
8149 }
8150
8161 {
8162 return mImpl->unmarkUnfusedTensorsAsDebugTensors();
8163 }
8164
8181 {
8182 return mImpl->addActivation(input, type);
8183 }
8184
8199 ILRNLayer* addLRN(ITensor& input, int64_t window, float alpha, float beta, float k) noexcept
8200 {
8201 return mImpl->addLRN(input, window, alpha, beta, k);
8202 }
8203
8225 IScaleLayer* addScale(ITensor& input, ScaleMode mode, Weights shift, Weights scale, Weights power) noexcept
8226 {
8227 return mImpl->addScale(input, mode, shift, scale, power);
8228 }
8229
8239 {
8240 return mImpl->addSoftMax(input);
8241 }
8242
8255 IConcatenationLayer* addConcatenation(ITensor* const* inputs, int32_t nbInputs) noexcept
8256 {
8257 return mImpl->addConcatenation(inputs, nbInputs);
8258 }
8259
8283 {
8284 return mImpl->addElementWise(input1, input2, op);
8285 }
8286
8304 IUnaryLayer* addUnary(ITensor& input, UnaryOperation operation) noexcept
8305 {
8306 return mImpl->addUnary(input, operation);
8307 }
8308
8319 {
8320 return mImpl->addShuffle(input);
8321 }
8322
8335 IOneHotLayer* addOneHot(ITensor& indices, ITensor& values, ITensor& depth, int32_t axis) noexcept
8336 {
8337 return mImpl->addOneHot(indices, values, depth, axis);
8338 }
8339
8347 int32_t getNbLayers() const noexcept
8348 {
8349 return mImpl->getNbLayers();
8350 }
8351
8361 ILayer* getLayer(int32_t index) const noexcept
8362 {
8363 return mImpl->getLayer(index);
8364 }
8365
8373 int32_t getNbInputs() const noexcept
8374 {
8375 return mImpl->getNbInputs();
8376 }
8377
8389 ITensor* getInput(int32_t index) const noexcept
8390 {
8391 return mImpl->getInput(index);
8392 }
8393
8403 int32_t getNbOutputs() const noexcept
8404 {
8405 return mImpl->getNbOutputs();
8406 }
8407
8419 ITensor* getOutput(int32_t index) const noexcept
8420 {
8421 return mImpl->getOutput(index);
8422 }
8423
8446 ITensor& input, ReduceOperation operation, uint32_t reduceAxes, bool keepDimensions) noexcept
8447 {
8448 return mImpl->addReduce(input, operation, reduceAxes, keepDimensions);
8449 }
8450
8481 TRT_DEPRECATED ITopKLayer* addTopK(ITensor& input, TopKOperation op, int32_t k, uint32_t reduceAxes) noexcept
8482 {
8483 return mImpl->addTopK(input, op, k, reduceAxes);
8484 }
8485
8514 ITopKLayer* addTopK(ITensor& input, TopKOperation op, int32_t k, uint32_t reduceAxes, DataType indicesType) noexcept
8515 {
8516 return mImpl->addTopKV2(input, op, k, reduceAxes, indicesType);
8517 }
8518
8530 IGatherLayer* addGather(ITensor& data, ITensor& indices, int32_t axis) noexcept
8531 {
8532 return mImpl->addGather(data, indices, axis);
8533 }
8534
8546 IGatherLayer* addGatherV2(ITensor& data, ITensor& indices, GatherMode mode) noexcept
8547 {
8548 return mImpl->addGatherV2(data, indices, mode);
8549 }
8550
8566 {
8567 return mImpl->addRaggedSoftMax(input, bounds);
8568 }
8569
8587 ITensor& input0, MatrixOperation op0, ITensor& input1, MatrixOperation op1) noexcept
8588 {
8589 return mImpl->addMatrixMultiply(input0, op0, input1, op1);
8590 }
8591
8606 {
8607 return mImpl->addNonZero(input);
8608 }
8609
8621 INonZeroLayer* addNonZero(ITensor& input, DataType indicesType) noexcept
8622 {
8623 return mImpl->addNonZeroV2(input, indicesType);
8624 }
8625
8645 IConstantLayer* addConstant(Dims const& dimensions, Weights weights) noexcept
8646 {
8647 return mImpl->addConstant(dimensions, weights);
8648 }
8649
8660 {
8661 return mImpl->addIdentity(input);
8662 }
8663
8674 ICastLayer* addCast(ITensor& input, DataType toType) noexcept
8675 {
8676 return mImpl->addCast(input, toType);
8677 }
8678
8689 void removeTensor(ITensor& tensor) noexcept
8690 {
8691 mImpl->removeTensor(tensor);
8692 }
8693
8701 void unmarkOutput(ITensor& tensor) noexcept
8702 {
8703 mImpl->unmarkOutput(tensor);
8704 }
8705
8722 TRT_DEPRECATED IPluginV2Layer* addPluginV2(ITensor* const* inputs, int32_t nbInputs, IPluginV2& plugin) noexcept
8723 {
8724 return mImpl->addPluginV2(inputs, nbInputs, plugin);
8725 }
8726
8740 IPluginV3Layer* addPluginV3(ITensor* const* inputs, int32_t nbInputs, ITensor* const* shapeInputs,
8741 int32_t nbShapeInputs, IPluginV3& plugin) noexcept
8742 {
8743 return mImpl->addPluginV3(inputs, nbInputs, shapeInputs, nbShapeInputs, plugin);
8744 }
8745
8760 ISliceLayer* addSlice(ITensor& input, Dims const& start, Dims const& size, Dims const& stride) noexcept
8761 {
8762 return mImpl->addSlice(input, start, size, stride);
8763 }
8764
8784 void setName(char const* name) noexcept
8785 {
8786 mImpl->setName(name);
8787 }
8788
8798 char const* getName() const noexcept
8799 {
8800 return mImpl->getName();
8801 }
8802
8814 IShapeLayer* addShape(ITensor& input) noexcept
8815 {
8816 return mImpl->addShape(input);
8817 }
8818
8829 {
8830 return mImpl->hasImplicitBatchDimension();
8831 }
8832
8839 {
8840 return mImpl->getFlags();
8841 }
8842
8850 bool getFlag(NetworkDefinitionCreationFlag networkDefinitionCreationFlag) const noexcept
8851 {
8852 return mImpl->getFlag(networkDefinitionCreationFlag);
8853 }
8854
8867 bool markOutputForShapes(ITensor& tensor) noexcept
8868 {
8869 return mImpl->markOutputForShapes(tensor);
8870 }
8871
8879 bool unmarkOutputForShapes(ITensor& tensor) noexcept
8880 {
8881 return mImpl->unmarkOutputForShapes(tensor);
8882 }
8883
8898 {
8899 return mImpl->addParametricReLU(input, slope);
8900 }
8901
8920 ITensor& input, int64_t nbOutputMaps, Dims const& kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
8921 {
8922 return mImpl->addConvolutionNd(input, nbOutputMaps, kernelSize, kernelWeights, biasWeights);
8923 }
8924
8939 IPoolingLayer* addPoolingNd(ITensor& input, PoolingType type, Dims const& windowSize) noexcept
8940 {
8941 return mImpl->addPoolingNd(input, type, windowSize);
8942 }
8943
8958 //
8962 ITensor& input, int64_t nbOutputMaps, Dims kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
8963 {
8964 return mImpl->addDeconvolutionNd(input, nbOutputMaps, kernelSize, kernelWeights, biasWeights);
8965 }
8966
8999 ITensor& input, ScaleMode mode, Weights shift, Weights scale, Weights power, int32_t channelAxis) noexcept
9000 {
9001 return mImpl->addScaleNd(input, mode, shift, scale, power, channelAxis);
9002 }
9003
9016 {
9017 return mImpl->addResize(input);
9018 }
9019
9029 ILoop* addLoop() noexcept
9030 {
9031 return mImpl->addLoop();
9032 }
9033
9045 {
9046 return mImpl->addIfConditional();
9047 }
9048
9083 ISelectLayer* addSelect(ITensor& condition, ITensor& thenInput, ITensor& elseInput) noexcept
9084 {
9085 return mImpl->addSelect(condition, thenInput, elseInput);
9086 }
9087
9100 IAssertionLayer* addAssertion(ITensor& condition, char const* message) noexcept
9101 {
9102 return mImpl->addAssertion(condition, message);
9103 }
9104
9125 TRT_DEPRECATED IFillLayer* addFill(Dims const& dimensions, FillOperation op) noexcept
9126 {
9127 return mImpl->addFill(dimensions, op);
9128 }
9129
9151 IFillLayer* addFill(Dims const& dimensions, FillOperation op, DataType outputType) noexcept
9152 {
9153 return mImpl->addFillV2(dimensions, op, outputType);
9154 }
9155
9167 IPaddingLayer* addPaddingNd(ITensor& input, Dims const& prePadding, Dims const& postPadding) noexcept
9168 {
9169 return mImpl->addPaddingNd(input, prePadding, postPadding);
9170 }
9171
9191 bool setWeightsName(Weights weights, char const* name) noexcept
9192 {
9193 return mImpl->setWeightsName(weights, name);
9194 }
9195
9207 //
9210 void setErrorRecorder(IErrorRecorder* recorder) noexcept
9211 {
9212 mImpl->setErrorRecorder(recorder);
9213 }
9214
9226 {
9227 return mImpl->getErrorRecorder();
9228 }
9229
9247 {
9248 return mImpl->addDequantize(input, scale);
9249 }
9250
9269 IDequantizeLayer* addDequantize(ITensor& input, ITensor& scale, DataType outputType) noexcept
9270 {
9271 return mImpl->addDequantizeV2(input, scale, outputType);
9272 }
9273
9289 IScatterLayer* addScatter(ITensor& data, ITensor& indices, ITensor& updates, ScatterMode mode) noexcept
9290 {
9291 return mImpl->addScatter(data, indices, updates, mode);
9292 }
9293
9311 {
9312 return mImpl->addQuantize(input, scale);
9313 }
9314
9334 IQuantizeLayer* addQuantize(ITensor& input, ITensor& scale, DataType outputType) noexcept
9335 {
9336 return mImpl->addQuantizeV2(input, scale, outputType);
9337 }
9338
9362 ITensor& input, int32_t axis, int32_t blockSize, DataType outputType, DataType scaleType) noexcept
9363 {
9364 return mImpl->addDynamicQuantize(input, axis, blockSize, outputType, scaleType);
9365 }
9366
9386 ITensor& input, Dims const& blockShape, DataType outputType, DataType scaleType) noexcept
9387 {
9388 return mImpl->addDynamicQuantizeV2(input, blockShape, outputType, scaleType);
9389 }
9390
9401 IEinsumLayer* addEinsum(ITensor* const* inputs, int32_t nbInputs, char const* equation) noexcept
9402 {
9403 return mImpl->addEinsum(inputs, nbInputs, equation);
9404 }
9405
9420 {
9421 return mImpl->addGridSample(input, grid);
9422 }
9423
9441 TRT_DEPRECATED INMSLayer* addNMS(ITensor& boxes, ITensor& scores, ITensor& maxOutputBoxesPerClass) noexcept
9442 {
9443 return mImpl->addNMS(boxes, scores, maxOutputBoxesPerClass);
9444 }
9445
9461 INMSLayer* addNMS(ITensor& boxes, ITensor& scores, ITensor& maxOutputBoxesPerClass, DataType indicesType) noexcept
9462 {
9463 return mImpl->addNMSV2(boxes, scores, maxOutputBoxesPerClass, indicesType);
9464 }
9465
9479 {
9480 return mImpl->addReverseSequence(input, sequenceLens);
9481 }
9482
9510 TRT_DEPRECATED INormalizationLayer* addNormalization(ITensor& input, ITensor& scale, ITensor& bias, uint32_t axesMask) noexcept
9511 {
9512 return mImpl->addNormalization(input, scale, bias, axesMask);
9513 }
9514
9532 ICumulativeLayer* addCumulative(ITensor& input, ITensor& axis, CumulativeOperation operation, bool exclusive, bool reverse) noexcept
9533 {
9534 return mImpl->addCumulative(input, axis, operation, exclusive, reverse);
9535 }
9536
9560 ITensor& query, ITensor& key, ITensor& value, AttentionNormalizationOp normOp, bool causal) noexcept
9561 {
9562 return mImpl->addAttention(query, key, value, normOp, causal);
9563 }
9564
9584 IRotaryEmbeddingLayer* addRotaryEmbedding(ITensor& input, ITensor& cosCache, ITensor& sinCache, bool interleaved, int32_t rotaryEmbeddingDim) noexcept
9585 {
9586 return mImpl->addRotaryEmbedding(input, cosCache, sinCache, interleaved, rotaryEmbeddingDim);
9587 }
9588
9619 ITensor& cache, ITensor& update, ITensor& writeIndices, KVCacheMode cacheMode) noexcept
9620 {
9621 return mImpl->addKVCacheUpdate(cache, update, writeIndices, cacheMode);
9622 }
9623
9638 IMoELayer* addMoE(ITensor& hiddenStates, ITensor& selectedExpertsForTokens, ITensor& scoresForSelectedExperts) noexcept
9639 {
9640 return mImpl->addMoE(hiddenStates, selectedExpertsForTokens, scoresForSelectedExperts);
9641 }
9642
9667 ReduceOperation reduceOp, int64_t root, int64_t* groups, int64_t groupSize) noexcept
9668 {
9669 return mImpl->addDistCollective(input, distCollectiveOp, reduceOp, root, groups, groupSize);
9670 }
9671
9678 virtual IBuilder& getBuilder() const noexcept
9679 {
9680 return mImpl->getBuilder();
9681 }
9682
9691 bool markWeightsRefittable(char const* name) noexcept
9692 {
9693 return mImpl->markWeightsRefittable(name);
9694 }
9695
9703 bool unmarkWeightsRefittable(char const* name) noexcept
9704 {
9705 return mImpl->unmarkWeightsRefittable(name);
9706 }
9707
9716 bool areWeightsMarkedRefittable(char const* name) const noexcept
9717 {
9718 return mImpl->areWeightsMarkedRefittable(name);
9719 }
9720
9735 ISqueezeLayer* addSqueeze(ITensor& input, ITensor& axes) noexcept
9736 {
9737 return mImpl->addSqueeze(input, axes);
9738 }
9739
9757 {
9758 return mImpl->addUnsqueeze(input, axes);
9759 }
9760
9782 TRT_NODISCARD INormalizationLayer* addNormalizationV2(ITensor& input, ITensor& scale, ITensor& bias, uint32_t axesMask) noexcept
9783 {
9784 return mImpl->addNormalizationV2(input, scale, bias, axesMask);
9785 }
9786
9787protected:
9788 apiv::VNetworkDefinition* mImpl;
9789};
9790
9798enum class CalibrationAlgoType : int32_t
9799{
9804};
9805
9811template <>
9812constexpr inline int32_t EnumMax<CalibrationAlgoType>() noexcept
9813{
9814 return 4;
9815}
9816
9831{
9832public:
9840 TRT_DEPRECATED virtual int32_t getBatchSize() const noexcept = 0;
9841
9856 virtual bool getBatch(void* bindings[], char const* names[], int32_t nbBindings) noexcept = 0;
9857
9872 virtual void const* readCalibrationCache(std::size_t& length) noexcept = 0;
9873
9882 virtual void writeCalibrationCache(void const* ptr, std::size_t length) noexcept = 0;
9883
9889 virtual CalibrationAlgoType getAlgorithm() noexcept = 0;
9890
9891 ~IInt8Calibrator() noexcept override = default;
9892};
9893
9894namespace v_1_0
9895{
9897{
9898public:
9902 InterfaceInfo getInterfaceInfo() const noexcept override
9903 {
9904 return InterfaceInfo{"IInt8EntropyCalibrator", 1, 0};
9905 }
9906
9911 {
9913 }
9914
9915 ~IInt8EntropyCalibrator() noexcept override = default;
9916};
9917} // namespace v_1_0
9918
9933
9934namespace v_1_0
9935{
9937{
9938public:
9942 InterfaceInfo getInterfaceInfo() const noexcept override
9943 {
9944 return InterfaceInfo{"IInt8EntropyCalibrator2", 1, 0};
9945 }
9946
9951 {
9953 }
9954
9955 ~IInt8EntropyCalibrator2() noexcept override = default;
9956};
9957} // namespace v_1_0
9958
9973
9974namespace v_1_0
9975{
9977{
9978public:
9982 InterfaceInfo getInterfaceInfo() const noexcept override
9983 {
9984 return InterfaceInfo{"IInt8MinMaxCalibrator", 1, 0};
9985 }
9986
9991 {
9993 }
9994
9995 ~IInt8MinMaxCalibrator() noexcept override = default;
9996};
9997} // namespace v_1_0
9998
10012
10013namespace v_1_0
10014{
10016{
10017public:
10021 InterfaceInfo getInterfaceInfo() const noexcept override
10022 {
10023 return InterfaceInfo{"IInt8Calibrator", 1, 0};
10024 }
10025
10030 {
10032 }
10033
10040 virtual double getQuantile() const noexcept = 0;
10041
10048 virtual double getRegressionCutoff() const noexcept = 0;
10049
10062 virtual void const* readHistogramCache(std::size_t& length) noexcept = 0;
10063
10072 virtual void writeHistogramCache(void const* ptr, std::size_t length) noexcept = 0;
10073
10074 ~IInt8LegacyCalibrator() noexcept override = default;
10075};
10076} // namespace v_1_0
10077
10092
10106{
10107public:
10113 DataType getDataType() const noexcept
10114 {
10115 return mImpl->getDataType();
10116 }
10117
10124 Dims getStrides() const noexcept
10125 {
10126 return mImpl->getStrides();
10127 }
10128
10134 int64_t getVectorizedDim() const noexcept
10135 {
10136 return mImpl->getVectorizedDim();
10137 }
10138
10145 int64_t getComponentsPerElement() const noexcept
10146 {
10147 return mImpl->getComponentsPerElement();
10148 }
10149
10150protected:
10151 virtual ~IAlgorithmIOInfo() noexcept = default;
10152 apiv::VAlgorithmIOInfo* mImpl;
10153};
10154
10169{
10170public:
10174 int64_t getImplementation() const noexcept
10175 {
10176 return mImpl->getImplementation();
10177 }
10178
10182 int64_t getTactic() const noexcept
10183 {
10184 return mImpl->getTactic();
10185 }
10186
10187protected:
10188 virtual ~IAlgorithmVariant() noexcept = default;
10189 apiv::VAlgorithmVariant* mImpl;
10190};
10191
10203{
10204public:
10210 char const* getName() const noexcept
10211 {
10212 return mImpl->getName();
10213 }
10214
10222 Dims getDimensions(int32_t index, OptProfileSelector select) const noexcept
10223 {
10224 return mImpl->getDimensions(index, select);
10225 }
10226
10230 int32_t getNbInputs() const noexcept
10231 {
10232 return mImpl->getNbInputs();
10233 }
10234
10238 int32_t getNbOutputs() const noexcept
10239 {
10240 return mImpl->getNbOutputs();
10241 }
10242
10243protected:
10244 virtual ~IAlgorithmContext() noexcept = default;
10245 apiv::VAlgorithmContext* mImpl;
10246};
10247
10262{
10263public:
10268 {
10269 return mImpl->getAlgorithmVariant();
10270 }
10271
10275 float getTimingMSec() const noexcept
10276 {
10277 return mImpl->getTimingMSec();
10278 }
10279
10283 std::size_t getWorkspaceSize() const noexcept
10284 {
10285 return mImpl->getWorkspaceSize();
10286 }
10287
10297 IAlgorithmIOInfo const* getAlgorithmIOInfoByIndex(int32_t index) const noexcept
10298 {
10299 return mImpl->getAlgorithmIOInfoByIndex(index);
10300 }
10301
10302protected:
10303 virtual ~IAlgorithm() noexcept = default;
10304 apiv::VAlgorithm* mImpl;
10305}; // IAlgorithm
10306
10307namespace v_1_0
10308{
10310{
10311public:
10315 InterfaceInfo getInterfaceInfo() const noexcept override
10316 {
10317 return InterfaceInfo{"IAlgorithmSelector", 1, 0};
10318 }
10333 virtual int32_t selectAlgorithms(IAlgorithmContext const& context, IAlgorithm const* const* choices,
10334 int32_t nbChoices, int32_t* selection) noexcept = 0;
10335
10346 virtual void reportAlgorithms(IAlgorithmContext const* const* algoContexts, IAlgorithm const* const* algoChoices,
10347 int32_t nbAlgorithms) noexcept = 0;
10348
10349 virtual ~IAlgorithmSelector() noexcept = default;
10350};
10351} // namespace v_1_0
10352
10366
10373using QuantizationFlags = uint32_t;
10374
10384enum class QuantizationFlag : int32_t
10385{
10390};
10391
10397template <>
10398constexpr inline int32_t EnumMax<QuantizationFlag>() noexcept
10399{
10400 return 1;
10401}
10402
10420enum class RuntimePlatform : int32_t
10421{
10424 kSAME_AS_BUILD = 0,
10425
10428 kWINDOWS_AMD64 = 1,
10429
10430
10431};
10432
10433namespace impl
10434{
10440template <>
10442{
10443 static constexpr int32_t kVALUE = 2;
10444};
10445} // namespace impl
10446
10453using BuilderFlags = uint32_t;
10454
10462enum class BuilderFlag : int32_t
10463{
10467
10471
10473 kDEBUG = 2,
10474
10476 kGPU_FALLBACK = 3,
10477
10479 kREFIT = 4,
10480
10483
10487 kTF32 = 6,
10488
10490 kSPARSE_WEIGHTS = 7,
10491
10504
10508
10513
10519
10523
10530
10536
10542
10546
10551
10557
10559 kSTRIP_PLAN = 19,
10560
10563
10570 kREFIT_IDENTICAL = 20,
10571
10597 kWEIGHT_STREAMING = 21,
10598
10602
10607 kREFIT_INDIVIDUAL = 23,
10608
10617 kSTRICT_NANS = 24,
10618
10620 kMONITOR_MEMORY = 25,
10621
10625
10628
10640
10641#if ENABLE_FEATURE_DISABLE_RUNTIME_ALLOCATION
10648 kREQUIRE_USER_ALLOCATION = 29,
10649#endif // ENABLE_FEATURE_DISABLE_RUNTIME_ALLOCATION
10650
10651};
10652
10658template <>
10659constexpr inline int32_t EnumMax<BuilderFlag>() noexcept
10660{
10661#if ENABLE_FEATURE_DISABLE_RUNTIME_ALLOCATION
10662 return 30;
10663#else
10664 return 29;
10665#endif // ENABLE_FEATURE_DISABLE_RUNTIME_ALLOCATION
10666}
10667
10668namespace v_1_0
10669{
10683{
10684 uint8_t data[16];
10685};
10686
10695{
10697 uint64_t tacticHash;
10701 static constexpr uint64_t kINVALID_TACTIC_HASH = UINT64_MAX;
10702};
10703} // namespace v_1_0
10704
10718class ITimingCache : public INoCopy
10719{
10720public:
10721 virtual ~ITimingCache() noexcept = default;
10722
10732 nvinfer1::IHostMemory* serialize() const noexcept
10733 {
10734 return mImpl->serialize();
10735 }
10736
10756 bool combine(ITimingCache const& inputCache, bool ignoreMismatch) noexcept
10757 {
10758 return mImpl->combine(inputCache, ignoreMismatch);
10759 }
10760
10766 bool reset() noexcept
10767 {
10768 return mImpl->reset();
10769 }
10770
10785 int64_t queryKeys(TimingCacheKey* keyBuffer, int64_t capacity) const noexcept
10786 {
10787 return mImpl->queryKeys(keyBuffer, capacity);
10788 }
10789
10802 TimingCacheValue query(TimingCacheKey const& key) const noexcept
10803 {
10804 return mImpl->query(key);
10805 }
10806
10824 bool update(TimingCacheKey const& key, TimingCacheValue const& value) noexcept
10825 {
10826 return mImpl->update(key, value);
10827 }
10828
10829protected:
10830 apiv::VTimingCache* mImpl;
10831};
10832
10840enum class MemoryPoolType : int32_t
10841{
10848 kWORKSPACE = 0,
10849
10857
10863 kDLA_LOCAL_DRAM = 2,
10864
10870 kDLA_GLOBAL_DRAM = 3,
10871
10879 kTACTIC_DRAM = 4,
10880
10894};
10895
10901template <>
10902constexpr inline int32_t EnumMax<MemoryPoolType>() noexcept
10903{
10904 return 6;
10905}
10906
10915enum class PreviewFeature : int32_t
10916{
10923
10928
10935
10944};
10945
10946namespace impl
10947{
10953template <>
10955{
10956 static constexpr int32_t kVALUE = 4;
10957};
10958} // namespace impl
10959
10968enum class HardwareCompatibilityLevel : int32_t
10969{
10972 kNONE = 0,
10973
10985 kAMPERE_PLUS = 1,
10986
10996};
10997
10998namespace impl
10999{
11005template <>
11007{
11008 static constexpr int32_t kVALUE = 3;
11009};
11010} // namespace impl
11011
11012
11021enum class TilingOptimizationLevel : int32_t
11022{
11024 kNONE = 0,
11025
11027 kFAST = 1,
11028
11031 kMODERATE = 2,
11032
11034 kFULL = 3
11035
11036};
11037
11038namespace impl
11039{
11045template <>
11047{
11048 static constexpr int32_t kVALUE = 4;
11049};
11050} // namespace impl
11051
11052namespace v_1_0
11053{
11055{
11056public:
11057 IProgressMonitor() = default;
11058 virtual ~IProgressMonitor() noexcept = default;
11059
11063 InterfaceInfo getInterfaceInfo() const noexcept override
11064 {
11065 return InterfaceInfo{"IProgressMonitor", 1, 0};
11066 }
11067
11087 virtual void phaseStart(char const* phaseName, char const* parentPhase, int32_t nbSteps) noexcept = 0;
11088
11101 virtual bool stepComplete(char const* phaseName, int32_t step) noexcept = 0;
11102
11114 virtual void phaseFinish(char const* phaseName) noexcept = 0;
11115
11116}; // class IProgressMonitor
11117} // namespace v_1_0
11118
11139
11148{
11149public:
11150 virtual ~IBuilderConfig() noexcept = default;
11151
11160 virtual void setAvgTimingIterations(int32_t avgTiming) noexcept
11161 {
11162 mImpl->setAvgTimingIterations(avgTiming);
11163 }
11164
11172 int32_t getAvgTimingIterations() const noexcept
11173 {
11174 return mImpl->getAvgTimingIterations();
11175 }
11176
11185 void setEngineCapability(EngineCapability capability) noexcept
11186 {
11187 mImpl->setEngineCapability(capability);
11188 }
11189
11198 {
11199 return mImpl->getEngineCapability();
11200 }
11201
11210 {
11211 mImpl->setInt8Calibrator(calibrator);
11212 }
11213
11220 {
11221 return mImpl->getInt8Calibrator();
11222 }
11223
11236 void setFlags(BuilderFlags builderFlags) noexcept
11237 {
11238 mImpl->setFlags(builderFlags);
11239 }
11240
11248 BuilderFlags getFlags() const noexcept
11249 {
11250 return mImpl->getFlags();
11251 }
11252
11260 void clearFlag(BuilderFlag builderFlag) noexcept
11261 {
11262 mImpl->clearFlag(builderFlag);
11263 }
11264
11272 void setFlag(BuilderFlag builderFlag) noexcept
11273 {
11274 mImpl->setFlag(builderFlag);
11275 }
11276
11284 bool getFlag(BuilderFlag builderFlag) const noexcept
11285 {
11286 return mImpl->getFlag(builderFlag);
11287 }
11288
11301 void setDeviceType(ILayer const* layer, DeviceType deviceType) noexcept
11302 {
11303 mImpl->setDeviceType(layer, deviceType);
11304 }
11305
11311 DeviceType getDeviceType(ILayer const* layer) const noexcept
11312 {
11313 return mImpl->getDeviceType(layer);
11314 }
11315
11323 bool isDeviceTypeSet(ILayer const* layer) const noexcept
11324 {
11325 return mImpl->isDeviceTypeSet(layer);
11326 }
11327
11333 void resetDeviceType(ILayer const* layer) noexcept
11334 {
11335 mImpl->resetDeviceType(layer);
11336 }
11337
11343 bool canRunOnDLA(ILayer const* layer) const noexcept
11344 {
11345 return mImpl->canRunOnDLA(layer);
11346 }
11347
11359 void setDLACore(int32_t dlaCore) noexcept
11360 {
11361 mImpl->setDLACore(dlaCore);
11362 }
11363
11369 int32_t getDLACore() const noexcept
11370 {
11371 return mImpl->getDLACore();
11372 }
11373
11380 void setDefaultDeviceType(DeviceType deviceType) noexcept
11381 {
11382 mImpl->setDefaultDeviceType(deviceType);
11383 }
11384
11391 {
11392 return mImpl->getDefaultDeviceType();
11393 }
11394
11400 void reset() noexcept
11401 {
11402 mImpl->reset();
11403 }
11404
11412 void setProfileStream(const cudaStream_t stream) noexcept
11413 {
11414 return mImpl->setProfileStream(stream);
11415 }
11416
11424 cudaStream_t getProfileStream() const noexcept
11425 {
11426 return mImpl->getProfileStream();
11427 }
11428
11441 int32_t addOptimizationProfile(IOptimizationProfile const* profile) noexcept
11442 {
11443 return mImpl->addOptimizationProfile(profile);
11444 }
11445
11454 int32_t getNbOptimizationProfiles() const noexcept
11455 {
11456 return mImpl->getNbOptimizationProfiles();
11457 }
11458
11467 {
11468 mImpl->setProfilingVerbosity(verbosity);
11469 }
11470
11480 {
11481 return mImpl->getProfilingVerbosity();
11482 }
11483
11492 {
11493 mImpl->setAlgorithmSelector(selector);
11494 }
11495
11502 {
11503 return mImpl->getAlgorithmSelector();
11504 }
11505
11520 {
11521 return mImpl->setCalibrationProfile(profile);
11522 }
11523
11532 {
11533 return mImpl->getCalibrationProfile();
11534 }
11535
11551 {
11552 mImpl->setQuantizationFlags(flags);
11553 }
11554
11565 {
11566 return mImpl->getQuantizationFlags();
11567 }
11568
11579 {
11580 mImpl->clearQuantizationFlag(flag);
11581 }
11582
11593 {
11594 mImpl->setQuantizationFlag(flag);
11595 }
11596
11607 {
11608 return mImpl->getQuantizationFlag(flag);
11609 }
11610
11628 bool setTacticSources(TacticSources tacticSources) noexcept
11629 {
11630 return mImpl->setTacticSources(tacticSources);
11631 }
11632
11644 {
11645 return mImpl->getTacticSources();
11646 }
11647
11663 nvinfer1::ITimingCache* createTimingCache(void const* blob, std::size_t size) const noexcept
11664 {
11665 return mImpl->createTimingCache(blob, size);
11666 }
11667
11686 bool setTimingCache(ITimingCache const& cache, bool ignoreMismatch) noexcept
11687 {
11688 return mImpl->setTimingCache(cache, ignoreMismatch);
11689 }
11690
11697 {
11698 return mImpl->getTimingCache();
11699 }
11700
11728 void setMemoryPoolLimit(MemoryPoolType pool, std::size_t poolSize) noexcept
11729 {
11730 mImpl->setMemoryPoolLimit(pool, poolSize);
11731 }
11732
11747 std::size_t getMemoryPoolLimit(MemoryPoolType pool) const noexcept
11748 {
11749 return mImpl->getMemoryPoolLimit(pool);
11750 }
11751
11765 void setPreviewFeature(PreviewFeature feature, bool enable) noexcept
11766 {
11767 mImpl->setPreviewFeature(feature, enable);
11768 }
11769
11779 bool getPreviewFeature(PreviewFeature feature) const noexcept
11780 {
11781 return mImpl->getPreviewFeature(feature);
11782 }
11783
11812 void setBuilderOptimizationLevel(int32_t level) noexcept
11813 {
11814 mImpl->setBuilderOptimizationLevel(level);
11815 }
11816
11825 {
11826 return mImpl->getBuilderOptimizationLevel();
11827 }
11828
11841 void setHardwareCompatibilityLevel(HardwareCompatibilityLevel hardwareCompatibilityLevel) noexcept
11842 {
11843 mImpl->setHardwareCompatibilityLevel(hardwareCompatibilityLevel);
11844 }
11845
11855 {
11856 return mImpl->getHardwareCompatibilityLevel();
11857 }
11858
11867 void setPluginsToSerialize(char const* const* paths, int32_t nbPaths) noexcept
11868 {
11869 mImpl->setPluginsToSerialize(paths, nbPaths);
11870 }
11871
11880 char const* getPluginToSerialize(int32_t index) const noexcept
11881 {
11882 return mImpl->getPluginToSerialize(index);
11883 }
11884
11890 int32_t getNbPluginsToSerialize() const noexcept
11891 {
11892 return mImpl->getNbPluginsToSerialize();
11893 }
11894
11919 void setMaxAuxStreams(int32_t nbStreams) noexcept
11920 {
11921 mImpl->setMaxAuxStreams(nbStreams);
11922 }
11923
11929 int32_t getMaxAuxStreams() const noexcept
11930 {
11931 return mImpl->getMaxAuxStreams();
11932 }
11933
11945 void setProgressMonitor(IProgressMonitor* monitor) noexcept
11946 {
11947 return mImpl->setProgressMonitor(monitor);
11948 }
11949
11956 {
11957 return mImpl->getProgressMonitor();
11958 }
11959
11971 void setRuntimePlatform(RuntimePlatform runtimePlatform) noexcept
11972 {
11973 mImpl->setRuntimePlatform(runtimePlatform);
11974 }
11975
11984 {
11985 return mImpl->getRuntimePlatform();
11986 }
11987
11995 void setMaxNbTactics(int32_t maxNbTactics) noexcept
11996 {
11997 mImpl->setMaxNbTactics(maxNbTactics);
11998 }
11999
12007 int32_t getMaxNbTactics() const noexcept
12008 {
12009 return mImpl->getMaxNbTactics();
12010 }
12011
12024 {
12025 return mImpl->setTilingOptimizationLevel(level);
12026 }
12027
12036 {
12037 return mImpl->getTilingOptimizationLevel();
12038 }
12039
12051 bool setL2LimitForTiling(int64_t size) noexcept
12052 {
12053 return mImpl->setL2LimitForTiling(size);
12054 }
12055
12063 int64_t getL2LimitForTiling() const noexcept
12064 {
12065 return mImpl->getL2LimitForTiling();
12066 }
12067
12077 bool setRemoteAutoTuningConfig(char const* config) noexcept
12078 {
12079 return mImpl->setRemoteAutoTuningConfig(config);
12080 }
12081
12087 char const* getRemoteAutoTuningConfig() const noexcept
12088 {
12089 return mImpl->getRemoteAutoTuningConfig();
12090 }
12091
12092protected:
12093 apiv::VBuilderConfig* mImpl;
12094};
12095
12104
12114{
12119
12124 kSTRONGLY_TYPED = 1,
12129
12134};
12135
12141template <>
12142constexpr inline int32_t EnumMax<NetworkDefinitionCreationFlag>() noexcept
12143{
12144 return 4;
12145}
12146
12154class IBuilder : public INoCopy
12155{
12156public:
12157 virtual ~IBuilder() noexcept = default;
12158
12164 TRT_DEPRECATED bool platformHasFastFp16() const noexcept
12165 {
12166 return mImpl->platformHasFastFp16();
12167 }
12168
12175 {
12176 return mImpl->platformHasFastInt8();
12177 }
12178
12186 int32_t getMaxDLABatchSize() const noexcept
12187 {
12188 return mImpl->getMaxDLABatchSize();
12189 }
12190
12194 int32_t getNbDLACores() const noexcept
12195 {
12196 return mImpl->getNbDLACores();
12197 }
12198
12212 void setGpuAllocator(IGpuAllocator* allocator) noexcept
12213 {
12214 mImpl->setGpuAllocator(allocator);
12215 }
12216
12227 {
12228 return mImpl->createBuilderConfig();
12229 }
12230
12253 {
12254 return mImpl->createNetworkV2(flags);
12255 }
12256
12268 {
12269 return mImpl->createOptimizationProfile();
12270 }
12271
12286 void setErrorRecorder(IErrorRecorder* recorder) noexcept
12287 {
12288 mImpl->setErrorRecorder(recorder);
12289 }
12290
12302 {
12303 return mImpl->getErrorRecorder();
12304 }
12305
12309 void reset() noexcept
12310 {
12311 mImpl->reset();
12312 }
12313
12319 TRT_DEPRECATED bool platformHasTf32() const noexcept
12320 {
12321 return mImpl->platformHasTf32();
12322 }
12323
12339 {
12340 return mImpl->buildSerializedNetwork(network, config);
12341 }
12342
12360 INetworkDefinition& network, IBuilderConfig& config, IStreamWriter& writer) noexcept
12361 {
12362 return mImpl->buildSerializedNetworkToStream(network, config, writer);
12363 }
12364
12384 INetworkDefinition& network, IBuilderConfig& config, IHostMemory*& kernelText) noexcept
12385 {
12386 return mImpl->buildSerializedNetworkWithKernelText(network, config, kernelText);
12387 }
12388
12405 {
12406 return mImpl->buildEngineWithConfig(network, config);
12407 }
12408
12430 bool isNetworkSupported(INetworkDefinition const& network, IBuilderConfig const& config) const noexcept
12431 {
12432 return mImpl->isNetworkSupported(network, config);
12433 }
12434
12440 ILogger* getLogger() const noexcept
12441 {
12442 return mImpl->getLogger();
12443 }
12444
12456 bool setMaxThreads(int32_t maxThreads) noexcept
12457 {
12458 return mImpl->setMaxThreads(maxThreads);
12459 }
12460
12470 int32_t getMaxThreads() const noexcept
12471 {
12472 return mImpl->getMaxThreads();
12473 }
12474
12481 {
12482 return mImpl->getPluginRegistry();
12483 }
12484
12485protected:
12486 apiv::VBuilder* mImpl;
12487};
12488
12489} // namespace nvinfer1
12490
12495extern "C" TENSORRTAPI void* createInferBuilder_INTERNAL(void* logger, int32_t version) noexcept;
12496
12497namespace nvinfer1
12498{
12499namespace
12500{
12501
12509inline IBuilder* createInferBuilder(ILogger& logger) noexcept
12510{
12511 return static_cast<IBuilder*>(createInferBuilder_INTERNAL(&logger, NV_TENSORRT_VERSION));
12512}
12513
12514} // namespace
12515
12528 nvinfer1::EngineCapability capability) noexcept;
12529
12539extern "C" TENSORRTAPI bool setInternalLibraryPath(AsciiChar const* path) noexcept;
12540
12541namespace safe
12542{
12544class IPluginRegistry;
12545} // namespace safe
12546
12554extern "C" TRT_DEPRECATED_API nvinfer1::safe::IPluginRegistry* getBuilderSafePluginRegistry(
12555 nvinfer1::EngineCapability capability) noexcept;
12556
12557} // namespace nvinfer1
12558
12559#endif // NV_INFER_H
#define TRT_DEPRECATED_API
Definition: NvInferRuntimeBase.h:44
#define TENSORRTAPI
Definition: NvInferRuntimeBase.h:70
#define NV_TENSORRT_VERSION
Definition: NvInferRuntimeBase.h:102
#define TRT_NODISCARD
A stand-in for [[nodiscard]] and [[nodiscard(REASON)]] that works with older compilers.
Definition: NvInferRuntimeBase.h:57
#define TRT_DEPRECATED
Definition: NvInferRuntimeBase.h:42
#define TRT_DEPRECATED_ENUM
Definition: NvInferRuntimeBase.h:43
Definition: NvInferRuntimeBase.h:219
static constexpr int32_t MAX_DIMS
The maximum rank (number of dimensions) supported for a tensor.
Definition: NvInferRuntimeBase.h:222
An Activation layer in a network definition.
Definition: NvInfer.h:1421
void setBeta(float beta) noexcept
Set the beta parameter (must be finite).
Definition: NvInfer.h:1469
void setActivationType(ActivationType type) noexcept
Set the type of activation to be performed.
Definition: NvInfer.h:1430
ActivationType getActivationType() const noexcept
Get the type of activation to be performed.
Definition: NvInfer.h:1440
float getAlpha() const noexcept
Get the alpha parameter.
Definition: NvInfer.h:1478
virtual ~IActivationLayer() noexcept=default
float getBeta() const noexcept
Get the beta parameter.
Definition: NvInfer.h:1487
void setAlpha(float alpha) noexcept
Set the alpha parameter (must be finite).
Definition: NvInfer.h:1455
Describes the context and requirements, that could be fulfilled by one or more instances of IAlgorith...
Definition: NvInfer.h:10203
int32_t getNbOutputs() const noexcept
Return number of outputs of the algorithm.
Definition: NvInfer.h:10238
int32_t getNbInputs() const noexcept
Return number of inputs of the algorithm.
Definition: NvInfer.h:10230
char const * getName() const noexcept
Return name of the algorithm node.
Definition: NvInfer.h:10210
virtual ~IAlgorithmContext() noexcept=default
Dims getDimensions(int32_t index, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for input or output tensor.
Definition: NvInfer.h:10222
Describes a variation of execution of a layer. An algorithm is represented by IAlgorithmVariant and t...
Definition: NvInfer.h:10262
std::size_t getWorkspaceSize() const noexcept
The size of the GPU temporary memory in bytes which the algorithm uses at execution time.
Definition: NvInfer.h:10283
float getTimingMSec() const noexcept
The time in milliseconds to execute the algorithm.
Definition: NvInfer.h:10275
IAlgorithmIOInfo const * getAlgorithmIOInfoByIndex(int32_t index) const noexcept
Returns the format of an Algorithm input or output. Algorithm inputs are incrementally numbered first...
Definition: NvInfer.h:10297
virtual ~IAlgorithm() noexcept=default
IAlgorithmVariant const & getAlgorithmVariant() const noexcept
Returns the algorithm variant.
Definition: NvInfer.h:10267
Carries information about input or output of the algorithm. IAlgorithmIOInfo for all the input and ou...
Definition: NvInfer.h:10106
virtual ~IAlgorithmIOInfo() noexcept=default
int64_t getVectorizedDim() const noexcept
Return the index of the vectorized dimension or -1 for non-vectorized formats.
Definition: NvInfer.h:10134
Dims getStrides() const noexcept
Return strides of the input/output tensor of algorithm. For vectorized formats, strides are given in ...
Definition: NvInfer.h:10124
DataType getDataType() const noexcept
Return DataType of the input/output of algorithm.
Definition: NvInfer.h:10113
int64_t getComponentsPerElement() const noexcept
Return the number of components per element. This is always 1 for non-vectorized formats.
Definition: NvInfer.h:10145
provides a unique 128-bit identifier, which along with the input and output information denotes the v...
Definition: NvInfer.h:10169
virtual ~IAlgorithmVariant() noexcept=default
int64_t getTactic() const noexcept
Return tactic of the algorithm.
Definition: NvInfer.h:10182
int64_t getImplementation() const noexcept
Return implementation of the algorithm.
Definition: NvInfer.h:10174
An assertion layer in a network.
Definition: NvInfer.h:5123
void setMessage(char const *message) noexcept
Set the message to print if the assertion fails.
Definition: NvInfer.h:5133
char const * getMessage() const noexcept
Return the assertion message.
Definition: NvInfer.h:5143
virtual ~IAssertionLayer() noexcept=default
This is a base class for Attention boundary layers.
Definition: NvInfer.h:6956
IAttention * getAttention() const noexcept
Get a pointer to the IAttention associated with this boundary layer.
Definition: NvInfer.h:6961
virtual ~IAttentionBoundaryLayer() noexcept=default
Helper for constructing an attention that consumes query, key and value tensors.
Definition: NvInfer.h:7074
ITensor * getMask() noexcept
Get the optional mask in attention.
Definition: NvInfer.h:7124
bool setMetadata(char const *metadata) noexcept
Set the metadata for IAttention.
Definition: NvInfer.h:7335
bool setDecomposable(bool decomposable) noexcept
Set whether the attention can be decomposed to use multiple kernels if no fused kernel support found.
Definition: NvInfer.h:7161
bool setName(char const *name) noexcept
Set the name of the attention.
Definition: NvInfer.h:7251
bool getDecomposable() const noexcept
Get whether the attention can be decomposed to use multiple kernels if no fused kernel support found.
Definition: NvInfer.h:7174
ITensor * getInput(int32_t index) const noexcept
Get the IAttention input corresponding to the given index.
Definition: NvInfer.h:7214
ITensor * getOutput(int32_t index) const noexcept
Get the IAttention output corresponding to the given index. IAttention has only one output.
Definition: NvInfer.h:7234
int32_t getNbOutputs() const noexcept
Get the number of outputs of a layer. IAttention has one output.
Definition: NvInfer.h:7222
bool setNbRanks(int32_t nbRanks) noexcept
Set the number of ranks for multi-device attention execution.
Definition: NvInfer.h:7364
int32_t getNbInputs() const noexcept
Get the number of inputs of IAttention. IAttention has three inputs.
Definition: NvInfer.h:7202
bool setCausal(bool isCausal) noexcept
Set whether the attention will run a causal inference. Cannot be used together with setMask().
Definition: NvInfer.h:7137
bool setNormalizationOperation(AttentionNormalizationOp op) noexcept
Set the normalization operation for the attention.
Definition: NvInfer.h:7083
char const * getName() const noexcept
Return the name of the attention.
Definition: NvInfer.h:7263
bool setNormalizationQuantizeToType(DataType type) noexcept
Set the datatype the attention normalization is quantized to.
Definition: NvInfer.h:7303
int32_t getNbRanks() const noexcept
Get the number of ranks for multi-device execution.
Definition: NvInfer.h:7376
AttentionNormalizationOp getNormalizationOperation() const noexcept
Get the normalization operation for the attention.
Definition: NvInfer.h:7095
bool setNormalizationQuantizeScale(ITensor &tensor) noexcept
Set the quantization scale for the attention normalization output.
Definition: NvInfer.h:7279
char const * getMetadata() const noexcept
Get the metadata of IAttention.
Definition: NvInfer.h:7348
DataType getNormalizationQuantizeToType() const noexcept
Get the datatype the attention normalization is quantized to.
Definition: NvInfer.h:7315
ITensor * getNormalizationQuantizeScale() const noexcept
Get the quantization scale for the attention normalization output.
Definition: NvInfer.h:7290
bool setInput(int32_t index, ITensor &input) noexcept
Append or replace an input of this layer with a specific tensor.
Definition: NvInfer.h:7193
bool setMask(ITensor &mask) noexcept
Set whether a mask will be used for the normalization operation.
Definition: NvInfer.h:7112
bool getCausal() const noexcept
Get whether the attention will run a causal inference.
Definition: NvInfer.h:7149
apiv::VAttention * mImpl
Definition: NvInfer.h:7382
virtual ~IAttention() noexcept=default
This layer represents an input to an attention subgraph.
Definition: NvInfer.h:6982
virtual ~IAttentionInputLayer() noexcept=default
This layer represents an output of an IAttention.
Definition: NvInfer.h:7017
virtual ~IAttentionOutputLayer() noexcept=default
Holds properties for configuring a builder to produce an engine.
Definition: NvInfer.h:11148
void setMemoryPoolLimit(MemoryPoolType pool, std::size_t poolSize) noexcept
Set the memory size for the memory pool.
Definition: NvInfer.h:11728
TRT_DEPRECATED void setQuantizationFlags(QuantizationFlags flags) noexcept
Set the quantization flags.
Definition: NvInfer.h:11550
nvinfer1::ITimingCache * createTimingCache(void const *blob, std::size_t size) const noexcept
Create timing cache.
Definition: NvInfer.h:11663
void setPreviewFeature(PreviewFeature feature, bool enable) noexcept
Enable or disable a specific preview feature.
Definition: NvInfer.h:11765
TRT_DEPRECATED void setAlgorithmSelector(IAlgorithmSelector *selector) noexcept
Set Algorithm Selector.
Definition: NvInfer.h:11491
TRT_DEPRECATED void setInt8Calibrator(IInt8Calibrator *calibrator) noexcept
Set Int8 Calibration interface.
Definition: NvInfer.h:11209
bool getPreviewFeature(PreviewFeature feature) const noexcept
Get status of preview feature.
Definition: NvInfer.h:11779
int32_t getBuilderOptimizationLevel() noexcept
Get builder optimization level.
Definition: NvInfer.h:11824
bool setTacticSources(TacticSources tacticSources) noexcept
Set tactic sources.
Definition: NvInfer.h:11628
void setPluginsToSerialize(char const *const *paths, int32_t nbPaths) noexcept
Set the plugin libraries to be serialized with version-compatible engines.
Definition: NvInfer.h:11867
bool setTilingOptimizationLevel(TilingOptimizationLevel level) noexcept
Set the Tiling optimization level.
Definition: NvInfer.h:12023
bool setL2LimitForTiling(int64_t size) noexcept
Set the L2 cache usage limit for Tiling optimization.
Definition: NvInfer.h:12051
TRT_DEPRECATED IInt8Calibrator * getInt8Calibrator() const noexcept
Get Int8 Calibration interface.
Definition: NvInfer.h:11219
std::size_t getMemoryPoolLimit(MemoryPoolType pool) const noexcept
Get the memory size limit of the memory pool.
Definition: NvInfer.h:11747
int32_t getDLACore() const noexcept
Get the DLA core that the engine executes on.
Definition: NvInfer.h:11369
int32_t getNbPluginsToSerialize() const noexcept
Get the number of plugin library paths to be serialized with version-compatible engines.
Definition: NvInfer.h:11890
void setDeviceType(ILayer const *layer, DeviceType deviceType) noexcept
Set the device that this layer must execute on.
Definition: NvInfer.h:11301
void setEngineCapability(EngineCapability capability) noexcept
Configure the builder to target specified EngineCapability flow.
Definition: NvInfer.h:11185
int32_t getMaxAuxStreams() const noexcept
Get the maximum number of auxiliary streams that TRT is allowed to use.
Definition: NvInfer.h:11929
bool getFlag(BuilderFlag builderFlag) const noexcept
Returns true if the build mode flag is set.
Definition: NvInfer.h:11284
void setMaxNbTactics(int32_t maxNbTactics) noexcept
Set the maximum number of tactics to time when there is a choice of tactics.
Definition: NvInfer.h:11995
TRT_DEPRECATED void clearQuantizationFlag(QuantizationFlag flag) noexcept
clear a quantization flag.
Definition: NvInfer.h:11578
int64_t getL2LimitForTiling() const noexcept
Get the L2 cache usage limit for tiling optimization.
Definition: NvInfer.h:12063
bool setRemoteAutoTuningConfig(char const *config) noexcept
Set a config string for remote auto tuning.
Definition: NvInfer.h:12077
void setProgressMonitor(IProgressMonitor *monitor) noexcept
Sets the progress monitor for building a network.
Definition: NvInfer.h:11945
void setProfilingVerbosity(ProfilingVerbosity verbosity) noexcept
Set verbosity level of layer information exposed in NVTX annotations and IEngineInspector.
Definition: NvInfer.h:11466
int32_t getNbOptimizationProfiles() const noexcept
Get number of optimization profiles.
Definition: NvInfer.h:11454
nvinfer1::ITimingCache const * getTimingCache() const noexcept
Get the pointer to the timing cache from current IBuilderConfig.
Definition: NvInfer.h:11696
void reset() noexcept
Resets the builder configuration to defaults.
Definition: NvInfer.h:11400
bool setTimingCache(ITimingCache const &cache, bool ignoreMismatch) noexcept
Attach a timing cache to IBuilderConfig.
Definition: NvInfer.h:11686
char const * getPluginToSerialize(int32_t index) const noexcept
Get the plugin library path to be serialized with version-compatible engines.
Definition: NvInfer.h:11880
EngineCapability getEngineCapability() const noexcept
Query EngineCapability flow configured for the builder.
Definition: NvInfer.h:11197
RuntimePlatform getRuntimePlatform() const noexcept
Get the target platform for runtime execution.
Definition: NvInfer.h:11983
DeviceType getDefaultDeviceType() const noexcept
Get the default DeviceType which was set by setDefaultDeviceType.
Definition: NvInfer.h:11390
void setRuntimePlatform(RuntimePlatform runtimePlatform) noexcept
Set the target platform for runtime execution.
Definition: NvInfer.h:11971
int32_t getMaxNbTactics() const noexcept
Query the maximum number of tactics timed when there is a choice.
Definition: NvInfer.h:12007
BuilderFlags getFlags() const noexcept
Get the build mode flags for this builder config. Defaults to 0.
Definition: NvInfer.h:11248
void setFlags(BuilderFlags builderFlags) noexcept
Set the build mode flags to turn on builder options for this network.
Definition: NvInfer.h:11236
TacticSources getTacticSources() const noexcept
Get tactic sources.
Definition: NvInfer.h:11643
void resetDeviceType(ILayer const *layer) noexcept
reset the DeviceType for this layer
Definition: NvInfer.h:11333
void setDLACore(int32_t dlaCore) noexcept
Sets the DLA core used by the network. Defaults to -1.
Definition: NvInfer.h:11359
HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept
Get the hardware compatibility level.
Definition: NvInfer.h:11854
char const * getRemoteAutoTuningConfig() const noexcept
Get a config string for remote auto tuning.
Definition: NvInfer.h:12087
TRT_DEPRECATED QuantizationFlags getQuantizationFlags() const noexcept
Get the quantization flags.
Definition: NvInfer.h:11564
void clearFlag(BuilderFlag builderFlag) noexcept
clear a single build mode flag.
Definition: NvInfer.h:11260
int32_t addOptimizationProfile(IOptimizationProfile const *profile) noexcept
Add an optimization profile.
Definition: NvInfer.h:11441
IProgressMonitor * getProgressMonitor() const noexcept
Definition: NvInfer.h:11955
apiv::VBuilderConfig * mImpl
Definition: NvInfer.h:12093
TRT_DEPRECATED IOptimizationProfile const * getCalibrationProfile() noexcept
Get the current calibration profile.
Definition: NvInfer.h:11531
int32_t getAvgTimingIterations() const noexcept
Query the number of averaging iterations.
Definition: NvInfer.h:11172
void setDefaultDeviceType(DeviceType deviceType) noexcept
Sets the default DeviceType to be used by the builder. It ensures that all the layers that can run on...
Definition: NvInfer.h:11380
void setFlag(BuilderFlag builderFlag) noexcept
Set a single build mode flag.
Definition: NvInfer.h:11272
TRT_DEPRECATED bool setCalibrationProfile(IOptimizationProfile const *profile) noexcept
Add a calibration profile.
Definition: NvInfer.h:11519
virtual ~IBuilderConfig() noexcept=default
DeviceType getDeviceType(ILayer const *layer) const noexcept
Get the device that this layer executes on.
Definition: NvInfer.h:11311
bool canRunOnDLA(ILayer const *layer) const noexcept
Checks if a layer can run on DLA.
Definition: NvInfer.h:11343
TRT_DEPRECATED bool getQuantizationFlag(QuantizationFlag flag) const noexcept
Returns true if the quantization flag is set.
Definition: NvInfer.h:11606
cudaStream_t getProfileStream() const noexcept
Get the CUDA stream that is used to profile this network.
Definition: NvInfer.h:11424
void setHardwareCompatibilityLevel(HardwareCompatibilityLevel hardwareCompatibilityLevel) noexcept
Set the hardware compatibility level.
Definition: NvInfer.h:11841
TilingOptimizationLevel getTilingOptimizationLevel() const noexcept
Get the Tiling optimization level.
Definition: NvInfer.h:12035
TRT_DEPRECATED void setQuantizationFlag(QuantizationFlag flag) noexcept
Set a single quantization flag.
Definition: NvInfer.h:11592
void setMaxAuxStreams(int32_t nbStreams) noexcept
Set the maximum number of auxiliary streams that TRT is allowed to use.
Definition: NvInfer.h:11919
ProfilingVerbosity getProfilingVerbosity() const noexcept
Get verbosity level of layer information exposed in NVTX annotations and IEngineInspector.
Definition: NvInfer.h:11479
bool isDeviceTypeSet(ILayer const *layer) const noexcept
whether the DeviceType has been explicitly set for this layer
Definition: NvInfer.h:11323
void setBuilderOptimizationLevel(int32_t level) noexcept
Set builder optimization level.
Definition: NvInfer.h:11812
void setProfileStream(const cudaStream_t stream) noexcept
Set the CUDA stream that is used to profile this network.
Definition: NvInfer.h:11412
TRT_DEPRECATED IAlgorithmSelector * getAlgorithmSelector() const noexcept
Get Algorithm Selector.
Definition: NvInfer.h:11501
Builds an engine from a network definition.
Definition: NvInfer.h:12155
int32_t getMaxDLABatchSize() const noexcept
Get the maximum batch size DLA can support. For any tensor the total volume of index dimensions combi...
Definition: NvInfer.h:12186
int32_t getNbDLACores() const noexcept
Return the number of DLA engines available to this builder.
Definition: NvInfer.h:12194
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInfer.h:12301
apiv::VBuilder * mImpl
Definition: NvInfer.h:12486
ILogger * getLogger() const noexcept
get the logger with which the builder was created
Definition: NvInfer.h:12440
bool isNetworkSupported(INetworkDefinition const &network, IBuilderConfig const &config) const noexcept
Checks that a network is within the scope of the IBuilderConfig settings.
Definition: NvInfer.h:12430
int32_t getMaxThreads() const noexcept
get the maximum number of threads that can be used by the builder.
Definition: NvInfer.h:12470
IPluginRegistry & getPluginRegistry() noexcept
get the local plugin registry that can be used by the builder.
Definition: NvInfer.h:12480
TRT_DEPRECATED bool platformHasFastInt8() const noexcept
Determine whether the platform has fast native int8.
Definition: NvInfer.h:12174
nvinfer1::IOptimizationProfile * createOptimizationProfile() noexcept
Create a new optimization profile.
Definition: NvInfer.h:12267
void setGpuAllocator(IGpuAllocator *allocator) noexcept
Set the GPU allocator.
Definition: NvInfer.h:12212
nvinfer1::INetworkDefinition * createNetworkV2(NetworkDefinitionCreationFlags flags) noexcept
Create a network definition object.
Definition: NvInfer.h:12252
nvinfer1::IBuilderConfig * createBuilderConfig() noexcept
Create a builder configuration object.
Definition: NvInfer.h:12226
void reset() noexcept
Resets the builder state to default values.
Definition: NvInfer.h:12309
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInfer.h:12456
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInfer.h:12286
nvinfer1::IHostMemory * buildSerializedNetwork(INetworkDefinition &network, IBuilderConfig &config) noexcept
Builds and serializes a network for the given INetworkDefinition and IBuilderConfig.
Definition: NvInfer.h:12338
virtual ~IBuilder() noexcept=default
TRT_DEPRECATED bool platformHasTf32() const noexcept
Determine whether the platform has TF32 support.
Definition: NvInfer.h:12319
bool buildSerializedNetworkToStream(INetworkDefinition &network, IBuilderConfig &config, IStreamWriter &writer) noexcept
Builds and serializes a network into stream for the given INetworkDefinition and IBuilderConfig.
Definition: NvInfer.h:12359
nvinfer1::ICudaEngine * buildEngineWithConfig(INetworkDefinition &network, IBuilderConfig &config) noexcept
Builds a network for the given INetworkDefinition and IBuilderConfig.
Definition: NvInfer.h:12404
nvinfer1::IHostMemory * buildSerializedNetwork(INetworkDefinition &network, IBuilderConfig &config, IHostMemory *&kernelText) noexcept
Extended form of buildSerializedNetwork that optionally permits getting the kernelText.
Definition: NvInfer.h:12383
A cast layer in a network.
Definition: NvInfer.h:3984
virtual ~ICastLayer() noexcept=default
apiv::VCastLayer * mImpl
Definition: NvInfer.h:4010
DataType getToType() const noexcept
Return cast layer output type.
Definition: NvInfer.h:4004
void setToType(DataType toType) noexcept
Set cast layer output type.
Definition: NvInfer.h:3993
A concatenation layer in a network definition.
Definition: NvInfer.h:2131
void setAxis(int32_t axis) noexcept
Set the axis along which concatenation occurs.
Definition: NvInfer.h:2144
int32_t getAxis() const noexcept
Get the axis along which concatenation occurs.
Definition: NvInfer.h:2154
virtual ~IConcatenationLayer() noexcept=default
This layer represents a condition input to an IIfConditional.
Definition: NvInfer.h:4647
virtual ~IConditionLayer() noexcept=default
Layer that represents a constant value.
Definition: NvInfer.h:4023
void setWeights(Weights weights) noexcept
Set the weights for the layer.
Definition: NvInfer.h:4033
Weights getWeights() const noexcept
Get the weights for the layer.
Definition: NvInfer.h:4043
void setDimensions(Dims const &dimensions) noexcept
Set the dimensions for the layer.
Definition: NvInfer.h:4055
apiv::VConstantLayer * mImpl
Definition: NvInfer.h:4073
virtual ~IConstantLayer() noexcept=default
Dims getDimensions() const noexcept
Get the dimensions for the layer.
Definition: NvInfer.h:4067
A convolution layer in a network definition.
Definition: NvInfer.h:1101
Dims getPrePadding() const noexcept
Get the pre-padding.
Definition: NvInfer.h:1226
Weights getBiasWeights() const noexcept
Get the bias weights for the convolution.
Definition: NvInfer.h:1199
void setPaddingMode(PaddingMode paddingMode) noexcept
Set the padding mode.
Definition: NvInfer.h:1267
void setDilationNd(Dims const &dilation) noexcept
Set the multi-dimension dilation of the convolution.
Definition: NvInfer.h:1371
Dims getPaddingNd() const noexcept
Get the multi-dimension padding of the convolution.
Definition: NvInfer.h:1357
Dims getStrideNd() const noexcept
Get the multi-dimension stride of the convolution.
Definition: NvInfer.h:1327
Weights getKernelWeights() const noexcept
Get the kernel weights of the convolution.
Definition: NvInfer.h:1174
void setStrideNd(Dims const &stride) noexcept
Set the multi-dimension stride of the convolution.
Definition: NvInfer.h:1317
Dims getDilationNd() const noexcept
Get the multi-dimension dilation of the convolution.
Definition: NvInfer.h:1381
int64_t getNbOutputMaps() const noexcept
Get the number of output maps for the convolution.
Definition: NvInfer.h:1120
void setKernelWeights(Weights weights) noexcept
Set the kernel weights for the convolution.
Definition: NvInfer.h:1164
Dims getPostPadding() const noexcept
Get the post-padding.
Definition: NvInfer.h:1253
int64_t getNbGroups() const noexcept
Get the number of groups of the convolution.
Definition: NvInfer.h:1150
PaddingMode getPaddingMode() const noexcept
Get the padding mode.
Definition: NvInfer.h:1279
virtual ~IConvolutionLayer() noexcept=default
void setNbGroups(int64_t nbGroups) noexcept
Set the number of groups for a convolution.
Definition: NvInfer.h:1140
void setNbOutputMaps(int64_t nbOutputMaps) noexcept
Set the number of output maps for the convolution.
Definition: NvInfer.h:1110
void setBiasWeights(Weights weights) noexcept
Set the bias weights for the convolution.
Definition: NvInfer.h:1189
Dims getKernelSizeNd() const noexcept
Get the multi-dimension kernel size of the convolution.
Definition: NvInfer.h:1302
void setPaddingNd(Dims const &padding) noexcept
Set the multi-dimension padding of the convolution.
Definition: NvInfer.h:1345
void setPrePadding(Dims const &padding) noexcept
Set the multi-dimension pre-padding of the convolution.
Definition: NvInfer.h:1216
void setPostPadding(Dims const &padding) noexcept
Set the multi-dimension post-padding of the convolution.
Definition: NvInfer.h:1243
void setKernelSizeNd(Dims const &kernelSize) noexcept
Set the multi-dimension kernel size of the convolution.
Definition: NvInfer.h:1292
An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:3197
Layer that represents a cumulative operation across a tensor.
Definition: NvInfer.h:6838
bool setOperation(CumulativeOperation op) noexcept
Set the cumulative operation for the layer.
Definition: NvInfer.h:6849
void setReverse(bool reverse) noexcept
Specify whether the cumulative operation should be applied backward.
Definition: NvInfer.h:6897
apiv::VCumulativeLayer * mImpl
Definition: NvInfer.h:6915
bool getExclusive() const noexcept
Get whether it is exclusive accumulation or inclusive accumulation.
Definition: NvInfer.h:6885
virtual ~ICumulativeLayer() noexcept=default
bool getReverse() const noexcept
Get the boolean that specifies whether the cumulative operation should be applied backward.
Definition: NvInfer.h:6909
void setExclusive(bool exclusive) noexcept
Set whether it is an exclusive accumulation or inclusive accumulation.
Definition: NvInfer.h:6873
CumulativeOperation getOperation() const noexcept
Get the cumulative operation for the layer.
Definition: NvInfer.h:6861
A deconvolution layer in a network definition.
Definition: NvInfer.h:2172
void setBiasWeights(Weights weights) noexcept
Set the bias weights for the deconvolution.
Definition: NvInfer.h:2260
int64_t getNbGroups() const noexcept
Get the number of groups for a deconvolution.
Definition: NvInfer.h:2221
Weights getKernelWeights() const noexcept
Get the kernel weights for the deconvolution.
Definition: NvInfer.h:2245
void setPrePadding(Dims const &padding) noexcept
Set the multi-dimension pre-padding of the deconvolution.
Definition: NvInfer.h:2287
Dims getStrideNd() const noexcept
Get the multi-dimension stride of the deconvolution.
Definition: NvInfer.h:2402
Dims getDilationNd() const noexcept
Get the multi-dimension dilation of the deconvolution.
Definition: NvInfer.h:2468
Weights getBiasWeights() const noexcept
Get the bias weights for the deconvolution.
Definition: NvInfer.h:2270
void setKernelWeights(Weights weights) noexcept
Set the kernel weights for the deconvolution.
Definition: NvInfer.h:2235
int64_t getNbOutputMaps() const noexcept
Get the number of output feature maps for the deconvolution.
Definition: NvInfer.h:2191
void setStrideNd(Dims const &stride) noexcept
Set the multi-dimension stride of the deconvolution.
Definition: NvInfer.h:2392
Dims getPostPadding() const noexcept
Get the padding.
Definition: NvInfer.h:2324
Dims getKernelSizeNd() const noexcept
Get the multi-dimension kernel size of the deconvolution.
Definition: NvInfer.h:2375
void setPostPadding(Dims const &padding) noexcept
Set the multi-dimension post-padding of the deconvolution.
Definition: NvInfer.h:2314
void setKernelSizeNd(Dims const &kernelSize) noexcept
Set the multi-dimension kernel size of the deconvolution.
Definition: NvInfer.h:2365
virtual ~IDeconvolutionLayer() noexcept=default
void setPaddingNd(Dims const &padding) noexcept
Set the multi-dimension padding of the deconvolution.
Definition: NvInfer.h:2420
void setNbOutputMaps(int64_t nbOutputMaps) noexcept
Set the number of output feature maps for the deconvolution.
Definition: NvInfer.h:2181
Dims getPaddingNd() const noexcept
Get the multi-dimension padding of the deconvolution.
Definition: NvInfer.h:2432
void setDilationNd(Dims const &dilation) noexcept
Set the multi-dimension dilation of the deconvolution.
Definition: NvInfer.h:2458
void setPaddingMode(PaddingMode paddingMode) noexcept
Set the padding mode.
Definition: NvInfer.h:2338
void setNbGroups(int64_t nbGroups) noexcept
Set the number of groups for a deconvolution.
Definition: NvInfer.h:2211
Dims getPrePadding() const noexcept
Get the pre-padding.
Definition: NvInfer.h:2297
PaddingMode getPaddingMode() const noexcept
Get the padding mode.
Definition: NvInfer.h:2350
A Dequantize layer in a network definition.
Definition: NvInfer.h:5736
TRT_NODISCARD Dims getBlockShape() const noexcept
Get the shape of the quantization block.
Definition: NvInfer.h:5785
void setToType(DataType toType) noexcept
Set the Dequantize layer output type.
Definition: NvInfer.h:5801
virtual ~IDequantizeLayer() noexcept=default
int32_t getAxis() const noexcept
Get the quantization axis.
Definition: NvInfer.h:5746
bool setBlockShape(Dims const &blockShape) noexcept
Set the shape of the quantization block.
Definition: NvInfer.h:5774
DataType getToType() const noexcept
Return the Dequantize layer output type.
Definition: NvInfer.h:5813
void setAxis(int32_t axis) noexcept
Set the quantization axis.
Definition: NvInfer.h:5757
Definition: NvInfer.h:8003
virtual ~IDistCollectiveLayer() noexcept=default
A network layer to perform dynamic quantization.
Definition: NvInfer.h:5841
DataType getScaleType() const noexcept
Return the scale factors data type.
Definition: NvInfer.h:5907
TRT_DEPRECATED void setAxis(int32_t axis) noexcept
Set the axis along which block quantization occurs.
Definition: NvInfer.h:5920
TRT_DEPRECATED void setBlockSize(int32_t size) noexcept
Set the size of the quantization block.
Definition: NvInfer.h:5943
Dims getBlockShape() const noexcept
Get the shape of the quantization block.
Definition: NvInfer.h:5978
void setScaleType(DataType scaleType) noexcept
Set the data type of the scale factors used to quantize the data.
Definition: NvInfer.h:5894
DataType getToType() const noexcept
Return DynamicQuantizeLayer's quantized output type.
Definition: NvInfer.h:5881
TRT_DEPRECATED int32_t getAxis() const noexcept
Get the axis along which blocking occurs.
Definition: NvInfer.h:5930
virtual ~IDynamicQuantizeLayer() noexcept=default
void setToType(DataType toType) noexcept
Set DynamicQuantizeLayer's quantized output type.
Definition: NvInfer.h:5868
void setBlockShape(Dims const &blockShape) noexcept
Set the shape of the quantization block.
Definition: NvInfer.h:5966
TRT_DEPRECATED int32_t getBlockSize() const noexcept
Get the size of the quantization block.
Definition: NvInfer.h:5953
An Einsum layer in a network.
Definition: NvInfer.h:6023
bool setEquation(char const *equation) noexcept
Set the equation. The equation is a comma-separated list of subscript labels, where each label refers...
Definition: NvInfer.h:6034
virtual ~IEinsumLayer() noexcept=default
char const * getEquation() const noexcept
Return the equation.
Definition: NvInfer.h:6044
A elementwise layer in a network definition.
Definition: NvInfer.h:2542
virtual ~IElementWiseLayer() noexcept=default
apiv::VElementWiseLayer * mImpl
Definition: NvInfer.h:2571
ElementWiseOperation getOperation() const noexcept
Get the binary operation for the layer.
Definition: NvInfer.h:2565
void setOperation(ElementWiseOperation op) noexcept
Set the binary operation for the layer.
Definition: NvInfer.h:2553
Generate a tensor according to a specified mode.
Definition: NvInfer.h:5237
bool isAlphaBetaInt64() const noexcept
Return true if alpha/beta have type int64, false if they have type double.
Definition: NvInfer.h:5469
FillOperation getOperation() const noexcept
Get the fill operation for the layer.
Definition: NvInfer.h:5283
void setOperation(FillOperation op) noexcept
Set the fill operation for the layer.
Definition: NvInfer.h:5273
DataType getToType() const noexcept
Get the fill layer output type.
Definition: NvInfer.h:5499
void setAlphaInt64(int64_t alpha) noexcept
Set the alpha parameter with int64 datatype.
Definition: NvInfer.h:5412
void setBetaInt64(int64_t beta) noexcept
Set the beta parameter with int64 datatype.
Definition: NvInfer.h:5446
void setBeta(double beta) noexcept
Set the beta parameter.
Definition: NvInfer.h:5336
int64_t getAlphaInt64() const noexcept
Get the value of alpha parameter with int64 datatype.
Definition: NvInfer.h:5427
int64_t getBetaInt64() const noexcept
Get the value of beta parameter with int64 datatype.
Definition: NvInfer.h:5461
double getAlpha() const noexcept
Get the value of alpha parameter.
Definition: NvInfer.h:5317
void setDimensions(Dims const &dimensions) noexcept
Set the output tensor's dimensions.
Definition: NvInfer.h:5248
void setAlpha(double alpha) noexcept
Set the alpha parameter.
Definition: NvInfer.h:5302
void setToType(DataType toType) noexcept
Set the fill layer output type.
Definition: NvInfer.h:5487
Dims getDimensions() const noexcept
Get the output tensor's dimensions.
Definition: NvInfer.h:5263
double getBeta() const noexcept
Get the value of beta parameter.
Definition: NvInfer.h:5351
virtual ~IFillLayer() noexcept=default
A Gather layer in a network definition. Supports several kinds of gathering.
Definition: NvInfer.h:2675
void setGatherAxis(int32_t axis) noexcept
Set the axis used by GatherMode::kELEMENTS and GatherMode::kDEFAULT The axis must be less than the nu...
Definition: NvInfer.h:2686
void setNbElementWiseDims(int32_t elementWiseDims) noexcept
Set the number of leading dimensions of indices tensor to be handled elementwise.
Definition: NvInfer.h:2721
apiv::VGatherLayer * mImpl
Definition: NvInfer.h:2757
int32_t getNbElementWiseDims() const noexcept
Get the number of leading dimensions of indices tensor to be handled elementwise.
Definition: NvInfer.h:2731
void setMode(GatherMode mode) noexcept
Set the gather mode.
Definition: NvInfer.h:2741
int32_t getGatherAxis() const noexcept
Get the axis to gather on.
Definition: NvInfer.h:2698
GatherMode getMode() const noexcept
Get the gather mode.
Definition: NvInfer.h:2751
virtual ~IGatherLayer() noexcept=default
A GridSample layer in a network definition.
Definition: NvInfer.h:6245
void setInterpolationMode(InterpolationMode mode) noexcept
Set the grid sample interpolation mode.
Definition: NvInfer.h:6252
bool setSampleMode(SampleMode mode) noexcept
Set the sample mode.
Definition: NvInfer.h:6298
void setAlignCorners(bool alignCorners) noexcept
Set the align corners mode.
Definition: NvInfer.h:6274
apiv::VGridSampleLayer * mImpl
Definition: NvInfer.h:6316
SampleMode getSampleMode() const noexcept
Get the sample mode.
Definition: NvInfer.h:6310
InterpolationMode getInterpolationMode() const noexcept
Get the grid sample interpolation mode.
Definition: NvInfer.h:6264
bool getAlignCorners() const noexcept
Get the align corners mode.
Definition: NvInfer.h:6286
virtual ~IGridSampleLayer() noexcept=default
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:142
A layer that represents the identity function.
Definition: NvInfer.h:3971
apiv::VIdentityLayer * mImpl
Definition: NvInfer.h:3973
virtual ~IIdentityLayer() noexcept=default
This is a base class for Conditional boundary layers.
Definition: NvInfer.h:4626
IIfConditional * getConditional() const noexcept
Get a pointer to the IIfConditional associated with this boundary layer.
Definition: NvInfer.h:4631
virtual ~IIfConditionalBoundaryLayer() noexcept=default
Helper for constructing conditionally-executed subgraphs.
Definition: NvInfer.h:4709
IIfConditionalInputLayer * addInput(ITensor &input) noexcept
Add an If-conditional input.
Definition: NvInfer.h:4750
char const * getName() const noexcept
Return the name of the conditional.
Definition: NvInfer.h:4775
virtual ~IIfConditional() noexcept=default
IConditionLayer * setCondition(ITensor &condition) noexcept
Set the condition tensor for this If-Conditional construct.
Definition: NvInfer.h:4720
IIfConditionalOutputLayer * addOutput(ITensor &trueSubgraphOutput, ITensor &falseSubgraphOutput) noexcept
Add an If-conditional output.
Definition: NvInfer.h:4738
void setName(char const *name) noexcept
Set the name of the conditional.
Definition: NvInfer.h:4765
This layer represents an input to an IIfConditional.
Definition: NvInfer.h:4677
virtual ~IIfConditionalInputLayer() noexcept=default
This layer represents an output of an IIfConditional.
Definition: NvInfer.h:4664
virtual ~IIfConditionalOutputLayer() noexcept=default
Application-implemented interface for calibration.
Definition: NvInfer.h:9831
virtual TRT_DEPRECATED int32_t getBatchSize() const noexcept=0
Get the batch size used for calibration batches.
A layer to do iterations.
Definition: NvInfer.h:4940
virtual ~IIteratorLayer() noexcept=default
void setReverse(bool reverse) noexcept
Set iteration order to be reverse.
Definition: NvInfer.h:4967
bool getReverse() const noexcept
Check if the iteration order is reverse.
Definition: NvInfer.h:4977
int32_t getAxis() const noexcept
Get axis being iterated over.
Definition: NvInfer.h:4953
void setAxis(int32_t axis) noexcept
Set axis to iterate over.
Definition: NvInfer.h:4945
Layer that represents a KVCacheUpdate operation.
Definition: NvInfer.h:7505
bool setCacheMode(KVCacheMode cacheMode) noexcept
Set the mode of the KVCacheUpdate layer.
Definition: NvInfer.h:7528
virtual ~IKVCacheUpdateLayer() noexcept=default
KVCacheMode getCacheMode() const noexcept
Get the mode of the KVCacheUpdate layer.
Definition: NvInfer.h:7538
apiv::VKVCacheUpdateLayer * mImpl
Definition: NvInfer.h:7544
A LRN layer in a network definition.
Definition: NvInfer.h:1786
int64_t getWindowSize() const noexcept
Get the LRN window size.
Definition: NvInfer.h:1807
float getAlpha() const noexcept
Get the LRN alpha value.
Definition: NvInfer.h:1829
void setWindowSize(int64_t windowSize) noexcept
Set the LRN window size.
Definition: NvInfer.h:1797
void setK(float k) noexcept
Set the LRN K value.
Definition: NvInfer.h:1863
void setAlpha(float alpha) noexcept
Set the LRN alpha value.
Definition: NvInfer.h:1819
void setBeta(float beta) noexcept
Set the LRN beta value.
Definition: NvInfer.h:1841
virtual ~ILRNLayer() noexcept=default
float getBeta() const noexcept
Get the LRN beta value.
Definition: NvInfer.h:1851
float getK() const noexcept
Get the LRN K value.
Definition: NvInfer.h:1873
Base class for all layer classes in a network definition.
Definition: NvInfer.h:583
TRT_DEPRECATED void setPrecision(DataType dataType) noexcept
Set the preferred or required computational precision of this layer in a weakly-typed network.
Definition: NvInfer.h:703
TRT_DEPRECATED void setOutputType(int32_t index, DataType dataType) noexcept
Set the output type of this layer in a weakly-typed network.
Definition: NvInfer.h:791
TRT_DEPRECATED bool precisionIsSet() const noexcept
whether the computational precision has been set for this layer
Definition: NvInfer.h:729
void setMetadata(char const *metadata) noexcept
Set the metadata for this layer.
Definition: NvInfer.h:854
TRT_DEPRECATED void resetOutputType(int32_t index) noexcept
reset the output type for this layer
Definition: NvInfer.h:836
void setName(char const *name) noexcept
Set the name of a layer.
Definition: NvInfer.h:604
int32_t getNbInputs() const noexcept
Get the number of inputs of a layer.
Definition: NvInfer.h:622
int32_t getNbRanks() const noexcept
Get the number of ranks for multi-device execution.
Definition: NvInfer.h:900
char const * getMetadata() const noexcept
Get the metadata of the layer.
Definition: NvInfer.h:867
DataType getOutputType(int32_t index) const noexcept
get the output type of this layer
Definition: NvInfer.h:806
DataType getPrecision() const noexcept
get the computational precision of this layer
Definition: NvInfer.h:715
TRT_DEPRECATED bool outputTypeIsSet(int32_t index) const noexcept
whether the output type has been set for this layer
Definition: NvInfer.h:822
char const * getName() const noexcept
Return the name of a layer.
Definition: NvInfer.h:614
int32_t getNbOutputs() const noexcept
Get the number of outputs of a layer.
Definition: NvInfer.h:643
ITensor * getOutput(int32_t index) const noexcept
Get the layer output corresponding to the given index.
Definition: NvInfer.h:653
void setInput(int32_t index, ITensor &tensor) noexcept
Replace an input of this layer with a specific tensor.
Definition: NvInfer.h:670
ITensor * getInput(int32_t index) const noexcept
Get the layer input corresponding to the given index.
Definition: NvInfer.h:635
bool setNbRanks(int32_t nbRanks) noexcept
Set the number of ranks for multi-device execution.
Definition: NvInfer.h:888
LayerType getType() const noexcept
Return the type of a layer.
Definition: NvInfer.h:590
TRT_DEPRECATED void resetPrecision() noexcept
reset the computational precision for this layer
Definition: NvInfer.h:741
virtual ~ILayer() noexcept=default
Application-implemented logging interface for the builder, refitter and runtime.
Definition: NvInferRuntime.h:1588
This is a base class for Loop boundary layers.
Definition: NvInfer.h:4603
virtual ~ILoopBoundaryLayer() noexcept=default
ILoop * getLoop() const noexcept
Get a pointer to ILoop associated with this boundary layer.
Definition: NvInfer.h:4608
Helper for creating a recurrent subgraph.
Definition: NvInfer.h:4998
void setName(char const *name) noexcept
Set the name of the loop.
Definition: NvInfer.h:5068
ITripLimitLayer * addTripLimit(ITensor &tensor, TripLimit limit) noexcept
Add a trip-count limiter, based on the given tensor.
Definition: NvInfer.h:5027
IIteratorLayer * addIterator(ITensor &tensor, int32_t axis=0, bool reverse=false) noexcept
Return layer that subscripts tensor by loop iteration.
Definition: NvInfer.h:5040
ILoopOutputLayer * addLoopOutput(ITensor &tensor, LoopOutput outputKind, int32_t axis=0) noexcept
Make an output for this loop, based on the given tensor.
Definition: NvInfer.h:5053
virtual ~ILoop() noexcept=default
char const * getName() const noexcept
Return the name of the loop.
Definition: NvInfer.h:5078
IRecurrenceLayer * addRecurrence(ITensor &initialValue) noexcept
Create a recurrence layer for this loop with initialValue as its first input.
Definition: NvInfer.h:5006
An ILoopOutputLayer is the sole way to get output from a loop.
Definition: NvInfer.h:4840
virtual ~ILoopOutputLayer() noexcept=default
int32_t getAxis() const noexcept
Get axis being concatenated over.
Definition: NvInfer.h:4870
LoopOutput getLoopOutput() const noexcept
Get which kind a loop output has.
Definition: NvInfer.h:4845
void setAxis(int32_t axis) noexcept
Set where to insert the contenation axis. Ignored if getLoopOutput() is kLAST_VALUE.
Definition: NvInfer.h:4862
Layer that represents a Matrix Multiplication.
Definition: NvInfer.h:3818
apiv::VMatrixMultiplyLayer * mImpl
Definition: NvInfer.h:3846
virtual ~IMatrixMultiplyLayer() noexcept=default
MatrixOperation getOperation(int32_t index) const noexcept
Get the operation for an input tensor.
Definition: NvInfer.h:3840
void setOperation(int32_t index, MatrixOperation op) noexcept
Set the operation for an input tensor.
Definition: NvInfer.h:3828
A MoE layer in a network definition. Mixture of Experts (MoE) is a collection of experts with each ex...
Definition: NvInfer.h:7687
void setSwigluParamLimit(float limit) noexcept
Set the SwiGLU parameter limit.
Definition: NvInfer.h:7902
void setDynQOutputScaleType(DataType type) noexcept
Set the dynamic quantization output scale type.
Definition: NvInfer.h:7855
MoEActType getActivationType() const noexcept
Get the activation type for the MoE layer.
Definition: NvInfer.h:7736
void setQuantizationToType(DataType type) noexcept
Set the data type the mul output is quantized to.
Definition: NvInfer.h:7803
void setQuantizationDynamicDblQ(ITensor &fcDownActivationDblQScale, DataType dataType, Dims const &blockShape, DataType dynQOutputScaleType) noexcept
Configure dynamic quantization (with double quantization) after the mul op. ┌── fcGate ── activation ...
Definition: NvInfer.h:7788
void setQuantizationStatic(ITensor &fcDownActivationScale, DataType dataType) noexcept
Configure static quantization after the mul op. ┌── fcGate ── activation ───┐ │ │ hiddenStates ───┤ ├...
Definition: NvInfer.h:7759
virtual ~IMoELayer() noexcept=default
float getSwigluParamLimit() const noexcept
Get the SwiGLU parameter limit.
Definition: NvInfer.h:7914
DataType getQuantizationToType() const noexcept
Get the data type the mul in MoE layer is quantized to.
Definition: NvInfer.h:7815
DataType getDynQOutputScaleType() const noexcept
Get the dynamic quantization output scale type.
Definition: NvInfer.h:7867
void setActivationType(MoEActType activationType) noexcept
Set the activation type for the MoE layer.
Definition: NvInfer.h:7724
Dims getQuantizationBlockShape() const noexcept
Get the block shape for the quantization of the Mul output.
Definition: NvInfer.h:7843
void setGatedWeights(ITensor &fcGateWeights, ITensor &fcUpWeights, ITensor &fcDownWeights, MoEActType activationType) noexcept
Set the weights of the experts when each expert is a GLU (gated linear unit). In each GLU,...
Definition: NvInfer.h:7700
float getSwigluParamBeta() const noexcept
Get the SwiGLU parameter beta.
Definition: NvInfer.h:7966
void setSwigluParamBeta(float beta) noexcept
Set the SwiGLU parameter beta.
Definition: NvInfer.h:7954
void setGatedBiases(ITensor &fcGateBiases, ITensor &fcUpBiases, ITensor &fcDownBiases) noexcept
Set the biases of the experts when each expert is a GLU (gated linear unit). In each GLU,...
Definition: NvInfer.h:7712
void setSwigluParams(float limit, float alpha, float beta) noexcept
Set the SwiGLU parameters.
Definition: NvInfer.h:7888
void setQuantizationBlockShape(Dims const &blockShape) noexcept
Set the block shape for the quantization of the Mul output.
Definition: NvInfer.h:7831
void setInput(int32_t index, ITensor &tensor) noexcept
Set the input of the MoE layer.
Definition: NvInfer.h:7983
float getSwigluParamAlpha() const noexcept
Get the SwiGLU parameter alpha.
Definition: NvInfer.h:7940
void setSwigluParamAlpha(float alpha) noexcept
Set the SwiGLU parameter alpha.
Definition: NvInfer.h:7928
A non-maximum suppression layer in a network definition.
Definition: NvInfer.h:6397
virtual ~INMSLayer() noexcept=default
void setTopKBoxLimit(int32_t limit) noexcept
Set the TopK box limit parameter for the layer.
Definition: NvInfer.h:6434
void setBoundingBoxFormat(BoundingBoxFormat fmt) noexcept
Set the bounding box format parameter for the layer.
Definition: NvInfer.h:6408
BoundingBoxFormat getBoundingBoxFormat() const noexcept
Get the bounding box format parameter for the layer.
Definition: NvInfer.h:6420
bool setIndicesType(DataType type) noexcept
Set the indices type for the layer.
Definition: NvInfer.h:6479
apiv::VNMSLayer * mImpl
Definition: NvInfer.h:6497
int32_t getTopKBoxLimit() const noexcept
Get the TopK box limit parameter for the layer.
Definition: NvInfer.h:6444
DataType getIndicesType() const noexcept
Return the NMS layer indices type.
Definition: NvInfer.h:6491
A network definition for input to the builder.
Definition: NvInfer.h:8027
IConcatenationLayer * addConcatenation(ITensor *const *inputs, int32_t nbInputs) noexcept
Add a concatenation layer to the network.
Definition: NvInfer.h:8255
IShuffleLayer * addShuffle(ITensor &input) noexcept
Add a shuffle layer to the network.
Definition: NvInfer.h:8318
void setName(char const *name) noexcept
Sets the name of the network.
Definition: NvInfer.h:8784
ITopKLayer * addTopK(ITensor &input, TopKOperation op, int32_t k, uint32_t reduceAxes, DataType indicesType) noexcept
Add a TopK layer to the network.
Definition: NvInfer.h:8514
bool markDebug(ITensor &tensor) noexcept
Mark a tensor as a debug tensor.
Definition: NvInfer.h:8098
ILRNLayer * addLRN(ITensor &input, int64_t window, float alpha, float beta, float k) noexcept
Add a LRN layer to the network.
Definition: NvInfer.h:8199
ICumulativeLayer * addCumulative(ITensor &input, ITensor &axis, CumulativeOperation operation, bool exclusive, bool reverse) noexcept
Add a cumulative layer to the network.
Definition: NvInfer.h:9532
IAssertionLayer * addAssertion(ITensor &condition, char const *message) noexcept
Add an assertion layer to the network.
Definition: NvInfer.h:9100
TRT_DEPRECATED INonZeroLayer * addNonZero(ITensor &input) noexcept
Add a nonzero layer to the network.
Definition: NvInfer.h:8605
IConvolutionLayer * addConvolutionNd(ITensor &input, int64_t nbOutputMaps, Dims const &kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
Add a multi-dimension convolution layer to the network.
Definition: NvInfer.h:8919
ICastLayer * addCast(ITensor &input, DataType toType) noexcept
Add a cast layer.
Definition: NvInfer.h:8674
IScaleLayer * addScaleNd(ITensor &input, ScaleMode mode, Weights shift, Weights scale, Weights power, int32_t channelAxis) noexcept
Add a multi-dimension scale layer to the network.
Definition: NvInfer.h:8998
char const * getName() const noexcept
Returns the name associated with the network.
Definition: NvInfer.h:8798
IParametricReLULayer * addParametricReLU(ITensor &input, ITensor &slope) noexcept
Add a parametric ReLU layer to the network.
Definition: NvInfer.h:8897
ITensor * getOutput(int32_t index) const noexcept
Get the output tensor specified by the given index.
Definition: NvInfer.h:8419
ITensor * getInput(int32_t index) const noexcept
Get the input tensor specified by the given index.
Definition: NvInfer.h:8389
TRT_DEPRECATED ITopKLayer * addTopK(ITensor &input, TopKOperation op, int32_t k, uint32_t reduceAxes) noexcept
Add a TopK layer to the network.
Definition: NvInfer.h:8481
IDequantizeLayer * addDequantize(ITensor &input, ITensor &scale, DataType outputType) noexcept
Add a dequantization layer to the network.
Definition: NvInfer.h:9269
bool unmarkOutputForShapes(ITensor &tensor) noexcept
Undo markOutputForShapes.
Definition: NvInfer.h:8879
IFillLayer * addFill(Dims const &dimensions, FillOperation op, DataType outputType) noexcept
Add a fill layer to the network.
Definition: NvInfer.h:9151
ILoop * addLoop() noexcept
Add a loop to the network.
Definition: NvInfer.h:9029
bool markUnfusedTensorsAsDebugTensors() noexcept
Mark unfused tensors as debug tensors.
Definition: NvInfer.h:8146
TRT_NODISCARD INormalizationLayer * addNormalizationV2(ITensor &input, ITensor &scale, ITensor &bias, uint32_t axesMask) noexcept
Add a normalization layer to the network.
Definition: NvInfer.h:9782
IActivationLayer * addActivation(ITensor &input, ActivationType type) noexcept
Add an activation layer to the network.
Definition: NvInfer.h:8180
TRT_DEPRECATED IFillLayer * addFill(Dims const &dimensions, FillOperation op) noexcept
Add a fill layer to the network.
Definition: NvInfer.h:9125
ISliceLayer * addSlice(ITensor &input, Dims const &start, Dims const &size, Dims const &stride) noexcept
Add a slice layer to the network.
Definition: NvInfer.h:8760
virtual ~INetworkDefinition() noexcept=default
TRT_DEPRECATED IQuantizeLayer * addQuantize(ITensor &input, ITensor &scale) noexcept
Add a quantization layer to the network.
Definition: NvInfer.h:9310
virtual IBuilder & getBuilder() const noexcept
Return the builder from which this INetworkDefinition was created.
Definition: NvInfer.h:9678
ILayer * getLayer(int32_t index) const noexcept
Get the layer specified by the given index.
Definition: NvInfer.h:8361
bool isDebugTensor(ITensor const &tensor) const noexcept
Check if a tensor is marked as debug tensor.
Definition: NvInfer.h:8124
bool getFlag(NetworkDefinitionCreationFlag networkDefinitionCreationFlag) const noexcept
Returns true if the network definition creation flag is set.
Definition: NvInfer.h:8850
IIfConditional * addIfConditional() noexcept
Add an if-then-else to the network.
Definition: NvInfer.h:9044
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInfer.h:9225
ISqueezeLayer * addSqueeze(ITensor &input, ITensor &axes) noexcept
Add a squeeze layer to the network.
Definition: NvInfer.h:9735
TRT_DEPRECATED INMSLayer * addNMS(ITensor &boxes, ITensor &scores, ITensor &maxOutputBoxesPerClass) noexcept
Add a non-maximum suppression layer to the network.
Definition: NvInfer.h:9441
IReverseSequenceLayer * addReverseSequence(ITensor &input, ITensor &sequenceLens) noexcept
Add a ReverseSequence layer to the network.
Definition: NvInfer.h:9478
TRT_DEPRECATED IDynamicQuantizeLayer * addDynamicQuantize(ITensor &input, int32_t axis, int32_t blockSize, DataType outputType, DataType scaleType) noexcept
Add a dynamic quantization layer to the network.
Definition: NvInfer.h:9361
int32_t getNbInputs() const noexcept
Get the number of inputs in the network.
Definition: NvInfer.h:8373
NetworkDefinitionCreationFlags getFlags() const noexcept
Get the network definition creation flags for this network definition object. Defaults to 0.
Definition: NvInfer.h:8838
IQuantizeLayer * addQuantize(ITensor &input, ITensor &scale, DataType outputType) noexcept
Add a quantization layer to the network.
Definition: NvInfer.h:9334
IDynamicQuantizeLayer * addDynamicQuantizeV2(ITensor &input, Dims const &blockShape, DataType outputType, DataType scaleType) noexcept
Add a dynamic quantization layer to the network.
Definition: NvInfer.h:9385
IReduceLayer * addReduce(ITensor &input, ReduceOperation operation, uint32_t reduceAxes, bool keepDimensions) noexcept
Add a reduce layer to the network.
Definition: NvInfer.h:8445
IUnaryLayer * addUnary(ITensor &input, UnaryOperation operation) noexcept
Add a unary layer to the network.
Definition: NvInfer.h:8304
IGridSampleLayer * addGridSample(ITensor &input, ITensor &grid) noexcept
Add a GridSample layer to the network.
Definition: NvInfer.h:9419
void removeTensor(ITensor &tensor) noexcept
remove a tensor from the network definition.
Definition: NvInfer.h:8689
bool areWeightsMarkedRefittable(char const *name) const noexcept
Whether the weight has been marked as refittable.
Definition: NvInfer.h:9716
ISelectLayer * addSelect(ITensor &condition, ITensor &thenInput, ITensor &elseInput) noexcept
Add a select layer to the network.
Definition: NvInfer.h:9083
IScatterLayer * addScatter(ITensor &data, ITensor &indices, ITensor &updates, ScatterMode mode) noexcept
Add a Scatter layer to the network with specified mode and axis=0.
Definition: NvInfer.h:9289
TRT_DEPRECATED INormalizationLayer * addNormalization(ITensor &input, ITensor &scale, ITensor &bias, uint32_t axesMask) noexcept
Add a normalization layer to the network.
Definition: NvInfer.h:9510
int32_t getNbLayers() const noexcept
Get the number of layers in the network.
Definition: NvInfer.h:8347
TRT_DEPRECATED bool hasImplicitBatchDimension() const noexcept
Query whether the network was created with an implicit batch dimension.
Definition: NvInfer.h:8828
apiv::VNetworkDefinition * mImpl
Definition: NvInfer.h:9788
IKVCacheUpdateLayer * addKVCacheUpdate(ITensor &cache, ITensor &update, ITensor &writeIndices, KVCacheMode cacheMode) noexcept
Add a KVCacheUpdate layer to the network.
Definition: NvInfer.h:9618
bool markOutputForShapes(ITensor &tensor) noexcept
Enable tensor's value to be computed by IExecutionContext::getShapeBinding.
Definition: NvInfer.h:8867
IOneHotLayer * addOneHot(ITensor &indices, ITensor &values, ITensor &depth, int32_t axis) noexcept
Add a OneHot layer to the network.
Definition: NvInfer.h:8335
IScaleLayer * addScale(ITensor &input, ScaleMode mode, Weights shift, Weights scale, Weights power) noexcept
Add a Scale layer to the network.
Definition: NvInfer.h:8225
IPluginV3Layer * addPluginV3(ITensor *const *inputs, int32_t nbInputs, ITensor *const *shapeInputs, int32_t nbShapeInputs, IPluginV3 &plugin) noexcept
Add a plugin layer implementing the IPluginV3 interface to the network.
Definition: NvInfer.h:8740
void unmarkOutput(ITensor &tensor) noexcept
unmark a tensor as a network output.
Definition: NvInfer.h:8701
IIdentityLayer * addIdentity(ITensor &input) noexcept
Add an identity layer.
Definition: NvInfer.h:8659
IGatherLayer * addGatherV2(ITensor &data, ITensor &indices, GatherMode mode) noexcept
Add gather with specified mode, axis=0 and nbElementWiseDims=0.
Definition: NvInfer.h:8546
INonZeroLayer * addNonZero(ITensor &input, DataType indicesType) noexcept
Add a nonzero layer to the network.
Definition: NvInfer.h:8621
IElementWiseLayer * addElementWise(ITensor &input1, ITensor &input2, ElementWiseOperation op) noexcept
Add an elementwise layer to the network.
Definition: NvInfer.h:8282
IConstantLayer * addConstant(Dims const &dimensions, Weights weights) noexcept
Add a constant layer to the network.
Definition: NvInfer.h:8645
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInfer.h:9210
IPoolingLayer * addPoolingNd(ITensor &input, PoolingType type, Dims const &windowSize) noexcept
Add a multi-dimension pooling layer to the network.
Definition: NvInfer.h:8939
INMSLayer * addNMS(ITensor &boxes, ITensor &scores, ITensor &maxOutputBoxesPerClass, DataType indicesType) noexcept
Add a non-maximum suppression layer to the network.
Definition: NvInfer.h:9461
IRaggedSoftMaxLayer * addRaggedSoftMax(ITensor &input, ITensor &bounds) noexcept
Add a RaggedSoftMax layer to the network.
Definition: NvInfer.h:8565
IShapeLayer * addShape(ITensor &input) noexcept
Add a shape layer to the network.
Definition: NvInfer.h:8814
IGatherLayer * addGather(ITensor &data, ITensor &indices, int32_t axis) noexcept
Add gather with mode GatherMode::kDEFAULT and specified axis and nbElementWiseDims=0.
Definition: NvInfer.h:8530
IAttention * addAttention(ITensor &query, ITensor &key, ITensor &value, AttentionNormalizationOp normOp, bool causal) noexcept
Add an attention to the network.
Definition: NvInfer.h:9559
bool unmarkWeightsRefittable(char const *name) noexcept
Unmark weights as refittable when the builder flag kREFIT_INDIVIDUAL is set.
Definition: NvInfer.h:9703
bool markWeightsRefittable(char const *name) noexcept
Mark weights as refittable when the builder flag kREFIT_INDIVIDUAL is set.
Definition: NvInfer.h:9691
IRotaryEmbeddingLayer * addRotaryEmbedding(ITensor &input, ITensor &cosCache, ITensor &sinCache, bool interleaved, int32_t rotaryEmbeddingDim) noexcept
Add a Rotary Position Embedding (RoPE) layer to the network.
Definition: NvInfer.h:9584
IDeconvolutionLayer * addDeconvolutionNd(ITensor &input, int64_t nbOutputMaps, Dims kernelSize, Weights kernelWeights, Weights biasWeights) noexcept
Add a multi-dimension deconvolution layer to the network.
Definition: NvInfer.h:8961
IResizeLayer * addResize(ITensor &input) noexcept
Add a resize layer to the network.
Definition: NvInfer.h:9015
IUnsqueezeLayer * addUnsqueeze(ITensor &input, ITensor &axes) noexcept
Add an unsqueeze layer to the network.
Definition: NvInfer.h:9756
IMatrixMultiplyLayer * addMatrixMultiply(ITensor &input0, MatrixOperation op0, ITensor &input1, MatrixOperation op1) noexcept
Add a MatrixMultiply layer to the network.
Definition: NvInfer.h:8586
ISoftMaxLayer * addSoftMax(ITensor &input) noexcept
Add a SoftMax layer to the network.
Definition: NvInfer.h:8238
bool unmarkDebug(ITensor &tensor) noexcept
Unmark a tensor as a debug tensor.
Definition: NvInfer.h:8114
IEinsumLayer * addEinsum(ITensor *const *inputs, int32_t nbInputs, char const *equation) noexcept
Add an Einsum layer to the network.
Definition: NvInfer.h:9401
void markOutput(ITensor &tensor) noexcept
Mark a tensor as a network output.
Definition: NvInfer.h:8080
TRT_DEPRECATED IPluginV2Layer * addPluginV2(ITensor *const *inputs, int32_t nbInputs, IPluginV2 &plugin) noexcept
Add a plugin layer to the network using the IPluginV2 interface.
Definition: NvInfer.h:8722
IPaddingLayer * addPaddingNd(ITensor &input, Dims const &prePadding, Dims const &postPadding) noexcept
Add a padding layer to the network. Only 2D padding is currently supported.
Definition: NvInfer.h:9167
TRT_DEPRECATED IDequantizeLayer * addDequantize(ITensor &input, ITensor &scale) noexcept
Add a dequantization layer to the network.
Definition: NvInfer.h:9246
int32_t getNbOutputs() const noexcept
Get the number of outputs in the network.
Definition: NvInfer.h:8403
bool setWeightsName(Weights weights, char const *name) noexcept
Associate a name with all current uses of the given weights.
Definition: NvInfer.h:9191
TRT_NODISCARD IDistCollectiveLayer * addDistCollective(ITensor &input, CollectiveOperation distCollectiveOp, ReduceOperation reduceOp, int64_t root, int64_t *groups, int64_t groupSize) noexcept
Add a DistCollective layer to the network.
Definition: NvInfer.h:9666
IMoELayer * addMoE(ITensor &hiddenStates, ITensor &selectedExpertsForTokens, ITensor &scoresForSelectedExperts) noexcept
Add a MoE (Mixture of Experts) layer to the network.
Definition: NvInfer.h:9638
bool unmarkUnfusedTensorsAsDebugTensors() noexcept
Undo the marking of unfused tensors as debug tensors.
Definition: NvInfer.h:8160
Forward declaration of IEngineInspector for use by other interfaces.
Definition: NvInferRuntime.h:51
Definition: NvInfer.h:3872
DataType getIndicesType() const noexcept
Return the NonZero layer indices type.
Definition: NvInfer.h:3896
bool setIndicesType(DataType type) noexcept
Set the indices type for the layer.
Definition: NvInfer.h:3884
virtual ~INonZeroLayer() noexcept=default
A normalization layer in a network definition.
Definition: NvInfer.h:6586
float getEpsilon() const noexcept
Get the epsilon value used for the normalization calculation.
Definition: NvInfer.h:6605
TRT_DEPRECATED void setComputePrecision(DataType type) noexcept
Set the compute precision of this layer.
Definition: NvInfer.h:6684
uint32_t getAxes() const noexcept
Get the axes value used for the normalization calculation.
Definition: NvInfer.h:6625
virtual ~INormalizationLayer() noexcept=default
void setEpsilon(float eps) noexcept
Set the epsilon value used for the normalization calculation.
Definition: NvInfer.h:6595
TRT_NODISCARD bool isV2() const noexcept
Returns true if this layer was created through addNormalizationV2().
Definition: NvInfer.h:6706
apiv::VNormalizationLayer * mImpl
Definition: NvInfer.h:6712
int64_t getNbGroups() const noexcept
Get the number of groups used to split the channels for the normalization calculation.
Definition: NvInfer.h:6656
void setAxes(uint32_t axesMask) noexcept
Set the reduction axes for the normalization calculation.
Definition: NvInfer.h:6615
void setNbGroups(int64_t nbGroups) noexcept
Set the number of groups used to split the channels in the normalization calculation.
Definition: NvInfer.h:6646
TRT_DEPRECATED DataType getComputePrecision() const noexcept
Get the compute precision of this layer.
Definition: NvInfer.h:6696
A OneHot layer in a network definition.
Definition: NvInfer.h:6208
virtual ~IOneHotLayer() noexcept=default
apiv::VOneHotLayer * mImpl
Definition: NvInfer.h:6229
void setAxis(int32_t axis) noexcept
Set the axis parameter.
Definition: NvInfer.h:6215
int32_t getAxis() const noexcept
Get the value of the axis parameter.
Definition: NvInfer.h:6223
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:2675
Layer that represents a padding operation.
Definition: NvInfer.h:3069
Dims getPostPaddingNd() const noexcept
Get the padding that is applied at the end of the tensor.
Definition: NvInfer.h:3118
void setPrePaddingNd(Dims const &padding) noexcept
Set the padding that is applied at the start of the tensor.
Definition: NvInfer.h:3080
virtual ~IPaddingLayer() noexcept=default
void setPostPaddingNd(Dims const &padding) noexcept
Set the padding that is applied at the end of the tensor.
Definition: NvInfer.h:3106
Dims getPrePaddingNd() const noexcept
Get the padding that is applied at the start of the tensor.
Definition: NvInfer.h:3092
apiv::VPaddingLayer * mImpl
Definition: NvInfer.h:3124
Layer that represents a parametric ReLU operation.
Definition: NvInfer.h:4087
apiv::VParametricReLULayer * mImpl
Definition: NvInfer.h:4089
virtual ~IParametricReLULayer() noexcept=default
Single registration point for all plugins in an application. It is used to find plugin implementation...
Definition: NvInferRuntimeCommon.h:56
Plugin class for user-implemented layers.
Definition: NvInferRuntimePlugin.h:139
Layer type for pluginV2.
Definition: NvInfer.h:2773
virtual ~IPluginV2Layer() noexcept=default
apiv::VPluginV2Layer * mImpl
Definition: NvInfer.h:2786
IPluginV2 & getPlugin() noexcept
Get the plugin for the layer.
Definition: NvInfer.h:2780
Layer type for V3 plugins.
Definition: NvInfer.h:2800
virtual ~IPluginV3Layer() noexcept=default
IPluginV3 & getPlugin() noexcept
Get the plugin for the layer.
Definition: NvInfer.h:2807
apiv::VPluginV3Layer * mImpl
Definition: NvInfer.h:2813
A Pooling layer in a network definition.
Definition: NvInfer.h:1535
PoolingType getPoolingType() const noexcept
Get the type of activation to be performed.
Definition: NvInfer.h:1554
PaddingMode getPaddingMode() const noexcept
Get the padding mode.
Definition: NvInfer.h:1687
Dims getPostPadding() const noexcept
Get the padding.
Definition: NvInfer.h:1663
bool getAverageCountExcludesPadding() const noexcept
Get whether average pooling uses as a denominator the overlap area between the window and the unpadde...
Definition: NvInfer.h:1607
Dims getPrePadding() const noexcept
Get the pre-padding.
Definition: NvInfer.h:1635
void setPoolingType(PoolingType type) noexcept
Set the type of activation to be performed.
Definition: NvInfer.h:1544
void setWindowSizeNd(Dims const &windowSize) noexcept
Set the multi-dimension window size for pooling.
Definition: NvInfer.h:1700
void setPaddingMode(PaddingMode paddingMode) noexcept
Set the padding mode.
Definition: NvInfer.h:1676
Dims getWindowSizeNd() const noexcept
Get the multi-dimension window size for pooling.
Definition: NvInfer.h:1710
void setAverageCountExcludesPadding(bool exclusive) noexcept
Set whether average pooling uses as a denominator the overlap area between the window and the unpadde...
Definition: NvInfer.h:1596
void setPaddingNd(Dims const &padding) noexcept
Set the multi-dimension padding for pooling.
Definition: NvInfer.h:1754
float getBlendFactor() const noexcept
Get the blending factor for the max_average_blend mode: max_average_blendPool = (1-blendFactor)*maxPo...
Definition: NvInfer.h:1582
void setStrideNd(Dims const &stride) noexcept
Set the multi-dimension stride for pooling.
Definition: NvInfer.h:1725
Dims getStrideNd() const noexcept
Get the multi-dimension stride for pooling.
Definition: NvInfer.h:1735
virtual ~IPoolingLayer() noexcept=default
Dims getPaddingNd() const noexcept
Get the multi-dimension padding for pooling.
Definition: NvInfer.h:1766
void setPostPadding(Dims const &padding) noexcept
Set the multi-dimension post-padding for pooling.
Definition: NvInfer.h:1653
void setPrePadding(Dims const &padding) noexcept
Set the multi-dimension pre-padding for pooling.
Definition: NvInfer.h:1625
void setBlendFactor(float blendFactor) noexcept
Set the blending factor for the max_average_blend mode: max_average_blendPool = (1-blendFactor)*maxPo...
Definition: NvInfer.h:1569
A Quantize layer in a network definition.
Definition: NvInfer.h:5584
void setToType(DataType toType) noexcept
Set the Quantize layer output type.
Definition: NvInfer.h:5645
bool setBlockShape(Dims const &blockShape) noexcept
Set the shape of the quantization block.
Definition: NvInfer.h:5618
void setAxis(int32_t axis) noexcept
Set the quantization axis.
Definition: NvInfer.h:5605
TRT_NODISCARD Dims getBlockShape() const noexcept
Get the shape of the quantization block.
Definition: NvInfer.h:5629
int32_t getAxis() const noexcept
Get the quantization axis.
Definition: NvInfer.h:5594
virtual ~IQuantizeLayer() noexcept=default
DataType getToType() const noexcept
Return the Quantize layer output type.
Definition: NvInfer.h:5657
A RaggedSoftmax layer in a network definition.
Definition: NvInfer.h:3921
apiv::VRaggedSoftMaxLayer * mImpl
Definition: NvInfer.h:3923
virtual ~IRaggedSoftMaxLayer() noexcept=default
A recurrence layer in a network definition.
Definition: NvInfer.h:4793
virtual ~IRecurrenceLayer() noexcept=default
Layer that represents a reduction across a non-bool tensor.
Definition: NvInfer.h:2989
void setKeepDimensions(bool keepDimensions) noexcept
Set the boolean that specifies whether or not to keep the reduced dimensions for the layer.
Definition: NvInfer.h:3036
void setOperation(ReduceOperation op) noexcept
Set the reduce operation for the layer.
Definition: NvInfer.h:2996
ReduceOperation getOperation() const noexcept
Get the reduce operation for the layer.
Definition: NvInfer.h:3006
virtual ~IReduceLayer() noexcept=default
uint32_t getReduceAxes() const noexcept
Get the axes over which to reduce for the layer.
Definition: NvInfer.h:3026
void setReduceAxes(uint32_t reduceAxes) noexcept
Set the axes over which to reduce.
Definition: NvInfer.h:3016
apiv::VReduceLayer * mImpl
Definition: NvInfer.h:3052
bool getKeepDimensions() const noexcept
Get the boolean that specifies whether or not to keep the reduced dimensions for the layer.
Definition: NvInfer.h:3046
A resize layer in a network definition.
Definition: NvInfer.h:4276
void setSelectorForSinglePixel(ResizeSelector selector) noexcept
Set coordinate selector function when resized to single pixel.
Definition: NvInfer.h:4437
void setNearestRounding(ResizeRoundMode value) noexcept
Set rounding mode for nearest neighbor resize.
Definition: NvInfer.h:4461
virtual ~IResizeLayer() noexcept=default
int32_t getScales(int32_t size, float *scales) const noexcept
Copies resize scales to scales[0, ..., nbScales-1], where nbScales is the number of scales that were ...
Definition: NvInfer.h:4355
void setOutputDimensions(Dims const &dimensions) noexcept
Set the output dimensions.
Definition: NvInfer.h:4296
void setCubicCoeff(float A) noexcept
Set the coefficient 'A' used in cubic interpolation.
Definition: NvInfer.h:4493
void setScales(float const *scales, int32_t nbScales) noexcept
Set the resize scales.
Definition: NvInfer.h:4336
float getCubicCoeff() const noexcept
Get the coefficient 'A' used in cubic interpolation.
Definition: NvInfer.h:4503
ResizeSelector getSelectorForSinglePixel() const noexcept
Get the coordinate selector function when resized to single pixel.
Definition: NvInfer.h:4447
InterpolationMode getResizeMode() const noexcept
Get resize mode for an input tensor.
Definition: NvInfer.h:4377
void setCoordinateTransformation(ResizeCoordinateTransformation coordTransform) noexcept
Set coordinate transformation function.
Definition: NvInfer.h:4412
void setExcludeOutside(bool excludeFlag) noexcept
Set the state for excluding outside pixels.
Definition: NvInfer.h:4516
void setResizeMode(InterpolationMode interpolationMode) noexcept
Set resize mode for an input tensor.
Definition: NvInfer.h:4367
Dims getOutputDimensions() const noexcept
Get the output dimensions.
Definition: NvInfer.h:4306
ResizeRoundMode getNearestRounding() const noexcept
Get rounding mode for nearest neighbor resize.
Definition: NvInfer.h:4471
bool getExcludeOutside() const noexcept
Get the state for excluding outside pixels.
Definition: NvInfer.h:4526
ResizeCoordinateTransformation getCoordinateTransformation() const noexcept
Get coordinate transformation function.
Definition: NvInfer.h:4422
A ReverseSequence layer in a network definition.
Definition: NvInfer.h:6514
void setSequenceAxis(int32_t sequenceAxis) noexcept
Set the sequence axis. Default is 0.
Definition: NvInfer.h:6547
int32_t getBatchAxis() const noexcept
Return the batch axis. Return 1 if no batch axis was set.
Definition: NvInfer.h:6534
apiv::VReverseSequenceLayer * mImpl
Definition: NvInfer.h:6563
int32_t getSequenceAxis() const noexcept
Return the sequence axis. Return 0 if no sequence axis was set.
Definition: NvInfer.h:6557
void setBatchAxis(int32_t batchAxis) noexcept
Set the batch axis. Default is 1.
Definition: NvInfer.h:6524
virtual ~IReverseSequenceLayer() noexcept=default
Layer that implements Rotary Position Embedding (RoPE) (https://arxiv.org/abs/2104....
Definition: NvInfer.h:7393
TRT_NODISCARD int32_t getRotaryEmbeddingDim() const noexcept
Get the number of hidden dimensions participating in RoPE. The default value is 0,...
Definition: NvInfer.h:7433
virtual ~IRotaryEmbeddingLayer() noexcept=default
void setInterleaved(bool interleaved) noexcept
Set whether the input is in interleaved format, i.e., whether the 2-d vectors rotated are taken from ...
Definition: NvInfer.h:7400
TRT_NODISCARD bool setRotaryEmbeddingDim(int32_t rotaryEmbeddingDim) noexcept
Set the number of hidden dimensions participating in RoPE. The default value is 0,...
Definition: NvInfer.h:7422
apiv::VRotaryEmbeddingLayer * mImpl
Definition: NvInfer.h:7456
TRT_NODISCARD bool getInterleaved() const noexcept
Get whether the input is in interleaved format. The default value is false.
Definition: NvInfer.h:7411
A Scale layer in a network definition.
Definition: NvInfer.h:1932
Weights getScale() const noexcept
Get the scale value.
Definition: NvInfer.h:1989
Weights getPower() const noexcept
Get the power value.
Definition: NvInfer.h:2009
void setScale(Weights scale) noexcept
Set the scale value.
Definition: NvInfer.h:1979
void setPower(Weights power) noexcept
Set the power value.
Definition: NvInfer.h:1999
ScaleMode getMode() const noexcept
Get the scale mode.
Definition: NvInfer.h:1949
void setShift(Weights shift) noexcept
Set the shift value.
Definition: NvInfer.h:1959
void setChannelAxis(int32_t channelAxis) noexcept
Set the channel axis.
Definition: NvInfer.h:2045
Weights getShift() const noexcept
Get the shift value.
Definition: NvInfer.h:1969
virtual ~IScaleLayer() noexcept=default
void setMode(ScaleMode mode) noexcept
Set the scale mode.
Definition: NvInfer.h:1939
int32_t getChannelAxis() const noexcept
Get the channel axis.
Definition: NvInfer.h:2024
A scatter layer in a network definition. Supports several kinds of scattering.
Definition: NvInfer.h:6136
void setMode(ScatterMode mode) noexcept
Set the scatter mode.
Definition: NvInfer.h:6143
apiv::VScatterLayer * mImpl
Definition: NvInfer.h:6177
void setAxis(int32_t axis) noexcept
Set the axis used by ScatterMode::kELEMENTS.
Definition: NvInfer.h:6163
int32_t getAxis() const noexcept
Get the axis.
Definition: NvInfer.h:6171
ScatterMode getMode() const noexcept
Get the scatter mode.
Definition: NvInfer.h:6153
virtual ~IScatterLayer() noexcept=default
Select elements from two data tensors based on a condition tensor.
Definition: NvInfer.h:5101
virtual ~ISelectLayer() noexcept=default
Layer type for getting shape of a tensor.
Definition: NvInfer.h:3594
virtual ~IShapeLayer() noexcept=default
apiv::VShapeLayer * mImpl
Definition: NvInfer.h:3596
Layer type for shuffling data.
Definition: NvInfer.h:3157
apiv::VShuffleLayer * mImpl
Definition: NvInfer.h:3315
void setFirstTranspose(Permutation permutation) noexcept
Set the permutation applied by the first transpose operation.
Definition: NvInfer.h:3168
void setSecondTranspose(Permutation permutation) noexcept
Set the permutation applied by the second transpose operation.
Definition: NvInfer.h:3268
Dims getReshapeDimensions() const noexcept
Get the reshaped dimensions.
Definition: NvInfer.h:3221
void setReshapeDimensions(Dims const &dimensions) noexcept
Set the reshaped dimensions.
Definition: NvInfer.h:3208
Permutation getFirstTranspose() const noexcept
Get the permutation applied by the first transpose operation.
Definition: NvInfer.h:3180
virtual ~IShuffleLayer() noexcept=default
Permutation getSecondTranspose() const noexcept
Get the permutation applied by the second transpose operation.
Definition: NvInfer.h:3280
bool getZeroIsPlaceholder() const noexcept
Get meaning of 0 in reshape dimensions.
Definition: NvInfer.h:3309
void setZeroIsPlaceholder(bool zeroIsPlaceholder) noexcept
Set meaning of 0 in reshape dimensions.
Definition: NvInfer.h:3296
Slices an input tensor into an output tensor based on the offset and strides.
Definition: NvInfer.h:3409
void setStride(Dims const &stride) noexcept
Set the stride for computing the output slice data.
Definition: NvInfer.h:3478
apiv::VSliceLayer * mImpl
Definition: NvInfer.h:3577
virtual ~ISliceLayer() noexcept=default
void setSize(Dims const &size) noexcept
Set the dimensions of the output slice.
Definition: NvInfer.h:3449
void setAxes(Dims const &axes) noexcept
Set the axes for this ISliceLayer.
Definition: NvInfer.h:3556
void setStart(Dims const &start) noexcept
Set the start offset that the slice layer uses to create the output slice.
Definition: NvInfer.h:3420
Dims getStart() const noexcept
Get the start offset for the slice layer.
Definition: NvInfer.h:3435
void setMode(SampleMode mode) noexcept
Set the slice mode.
Definition: NvInfer.h:3503
Dims getSize() const noexcept
Get dimensions of the output slice.
Definition: NvInfer.h:3464
SampleMode getMode() const noexcept
Get the slice mode.
Definition: NvInfer.h:3513
Dims getStride() const noexcept
Get the stride for the output slice.
Definition: NvInfer.h:3493
Dims getAxes() const noexcept
Get the axes for this ISliceLayer.
Definition: NvInfer.h:3571
A Softmax layer in a network definition.
Definition: NvInfer.h:2076
void setAxes(uint32_t axes) noexcept
Set the axis along which softmax is computed. Currently, only one axis can be set.
Definition: NvInfer.h:2098
uint32_t getAxes() const noexcept
Get the axis along which softmax occurs.
Definition: NvInfer.h:2108
virtual ~ISoftMaxLayer() noexcept=default
Layer that represents a squeeze operation, removing unit dimensions of the first input tensor on a se...
Definition: NvInfer.h:6726
virtual ~ISqueezeLayer() noexcept=default
apiv::VSqueezeLayer * mImpl
Definition: NvInfer.h:6743
A tensor in a network definition.
Definition: NvInfer.h:189
void setAllowedFormats(TensorFormats formats) noexcept
Set allowed formats for an input or output tensor. By default all formats are allowed....
Definition: NvInfer.h:459
TensorLocation getLocation() const noexcept
Get the storage location of a tensor.
Definition: NvInfer.h:378
void setDimensions(Dims const &dimensions) noexcept
Set the dimensions of a tensor.
Definition: NvInfer.h:237
void resetDynamicRange() noexcept
Undo effect of setDynamicRange.
Definition: NvInfer.h:417
void setName(char const *name) noexcept
Set the tensor name.
Definition: NvInfer.h:206
bool isExecutionTensor() const noexcept
Whether the tensor is an execution tensor.
Definition: NvInfer.h:524
TRT_DEPRECATED bool dynamicRangeIsSet() const noexcept
Query whether dynamic range is set.
Definition: NvInfer.h:409
char const * getName() const noexcept
Get the tensor name.
Definition: NvInfer.h:218
bool isShapeTensor() const noexcept
Whether the tensor is a shape tensor.
Definition: NvInfer.h:503
float getDynamicRangeMax() const noexcept
Get maximum of dynamic range.
Definition: NvInfer.h:437
bool isNetworkInput() const noexcept
Whether the tensor is a network input.
Definition: NvInfer.h:327
TRT_DEPRECATED void setBroadcastAcrossBatch(bool broadcastAcrossBatch) noexcept
Set whether to enable broadcast of tensor across the implicit batch dimension.
Definition: NvInfer.h:352
TRT_DEPRECATED bool setDynamicRange(float min, float max) noexcept
Set dynamic range for the tensor.
Definition: NvInfer.h:319
TRT_DEPRECATED void setType(DataType type) noexcept
Set the data type of a tensor.
Definition: NvInfer.h:287
TRT_DEPRECATED bool getBroadcastAcrossBatch() const noexcept
Check if tensor is broadcast across the implicit batch dimension.
Definition: NvInfer.h:366
bool isNetworkOutput() const noexcept
Whether the tensor is a network output.
Definition: NvInfer.h:335
DataType getType() const noexcept
Get the data type of a tensor.
Definition: NvInfer.h:302
apiv::VTensor * mImpl
Definition: NvInfer.h:571
float getDynamicRangeMin() const noexcept
Get minimum of dynamic range.
Definition: NvInfer.h:427
virtual ~ITensor() noexcept=default
void setDimensionName(int32_t index, char const *name) noexcept
Name a dimension of an input tensor.
Definition: NvInfer.h:550
char const * getDimensionName(int32_t index) const noexcept
Get the name of an input dimension.
Definition: NvInfer.h:565
TRT_DEPRECATED void setLocation(TensorLocation location) noexcept
Set the storage location of a tensor.
Definition: NvInfer.h:397
Dims getDimensions() const noexcept
Get the dimensions of a tensor.
Definition: NvInfer.h:251
TensorFormats getAllowedFormats() const noexcept
Get a bitmask of TensorFormat values that the tensor supports. For a shape tensor,...
Definition: NvInfer.h:472
Class to handle tactic timing info collected from builder.
Definition: NvInfer.h:10719
int64_t queryKeys(TimingCacheKey *keyBuffer, int64_t capacity) const noexcept
Query cache keys from Timing Cache.
Definition: NvInfer.h:10785
bool combine(ITimingCache const &inputCache, bool ignoreMismatch) noexcept
Combine input timing cache into local instance.
Definition: NvInfer.h:10756
TimingCacheValue query(TimingCacheKey const &key) const noexcept
Query value in a cache entry.
Definition: NvInfer.h:10802
virtual ~ITimingCache() noexcept=default
bool update(TimingCacheKey const &key, TimingCacheValue const &value) noexcept
Update values in a cache entry.
Definition: NvInfer.h:10824
apiv::VTimingCache * mImpl
Definition: NvInfer.h:10830
bool reset() noexcept
Empty the timing cache.
Definition: NvInfer.h:10766
Layer that represents a TopK reduction.
Definition: NvInfer.h:3634
void setK(int32_t k) noexcept
Set the static k value for the layer.
Definition: NvInfer.h:3665
void setReduceAxes(uint32_t reduceAxes) noexcept
Set which axes to reduce for the layer.
Definition: NvInfer.h:3689
TopKOperation getOperation() const noexcept
Get the operation for the layer.
Definition: NvInfer.h:3651
apiv::VTopKLayer * mImpl
Definition: NvInfer.h:3748
void setOperation(TopKOperation op) noexcept
Set the operation for the layer.
Definition: NvInfer.h:3641
bool setIndicesType(DataType type) noexcept
Set the indices type for the layer.
Definition: NvInfer.h:3730
int32_t getK() const noexcept
Get the k value for the layer.
Definition: NvInfer.h:3679
uint32_t getReduceAxes() const noexcept
Get the axes to reduce for the layer.
Definition: NvInfer.h:3699
virtual ~ITopKLayer() noexcept=default
DataType getIndicesType() const noexcept
Return the TopK layer indices type.
Definition: NvInfer.h:3742
A layer that represents a trip-count limiter.
Definition: NvInfer.h:4914
TripLimit getTripLimit() const noexcept
Get a trip limiter type.
Definition: NvInfer.h:4919
virtual ~ITripLimitLayer() noexcept=default
Layer that represents an unary operation.
Definition: NvInfer.h:2881
void setOperation(UnaryOperation op) noexcept
Set the unary operation for the layer.
Definition: NvInfer.h:2890
apiv::VUnaryLayer * mImpl
Definition: NvInfer.h:2906
UnaryOperation getOperation() const noexcept
Get the unary operation for the layer.
Definition: NvInfer.h:2900
virtual ~IUnaryLayer() noexcept=default
Layer that represents an unsqueeze operation, which reshapes the first input tensor by inserting unit...
Definition: NvInfer.h:6756
virtual ~IUnsqueezeLayer() noexcept=default
apiv::VUnsqueezeLayer * mImpl
Definition: NvInfer.h:6774
An Interface class for version control.
Definition: NvInferRuntimeBase.h:279
Version information associated with a TRT interface.
Definition: NvInferRuntimeBase.h:244
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:124
Definition: NvInfer.h:10310
virtual int32_t selectAlgorithms(IAlgorithmContext const &context, IAlgorithm const *const *choices, int32_t nbChoices, int32_t *selection) noexcept=0
Select Algorithms for a layer from the given list of algorithm choices.
virtual void reportAlgorithms(IAlgorithmContext const *const *algoContexts, IAlgorithm const *const *algoChoices, int32_t nbAlgorithms) noexcept=0
Called by TensorRT to report choices it made.
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInfer.h:10315
virtual ~IAlgorithmSelector() noexcept=default
Definition: NvInferRuntimeBase.h:416
Definition: NvInferRuntime.h:1656
~IInt8EntropyCalibrator2() noexcept override=default
CalibrationAlgoType getAlgorithm() noexcept override
Definition: NvInfer.h:9950
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInfer.h:9942
Definition: NvInfer.h:9897
CalibrationAlgoType getAlgorithm() noexcept override
Definition: NvInfer.h:9910
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInfer.h:9902
~IInt8EntropyCalibrator() noexcept override=default
Definition: NvInfer.h:10016
CalibrationAlgoType getAlgorithm() noexcept override
Definition: NvInfer.h:10029
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInfer.h:10021
virtual double getQuantile() const noexcept=0
The quantile (between 0 and 1) that will be used to select the region maximum when the quantile metho...
Definition: NvInfer.h:9977
~IInt8MinMaxCalibrator() noexcept override=default
CalibrationAlgoType getAlgorithm() noexcept override
Definition: NvInfer.h:9990
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInfer.h:9982
Definition: NvInferPluginBase.h:206
Definition: NvInfer.h:11055
virtual bool stepComplete(char const *phaseName, int32_t step) noexcept=0
Signal that a step of an optimizer phase has finished.
virtual ~IProgressMonitor() noexcept=default
virtual void phaseFinish(char const *phaseName) noexcept=0
Signal that a phase of the optimizer has finished.
virtual void phaseStart(char const *phaseName, char const *parentPhase, int32_t nbSteps) noexcept=0
Signal that a phase of the optimizer has started.
Definition: NvInferRuntime.h:666
IBuilder * createInferBuilder(ILogger &logger) noexcept
Create an instance of an IBuilder class.
Definition: NvInfer.h:12509
The TensorRT API version 1 namespace.
Definition: NvInferSafePlugin.h:33
uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferRuntime.h:2961
ResizeSelector
The coordinate selector when resize to single pixel output.
Definition: NvInfer.h:4181
@ kFORMULA
Use formula to map the original index.
@ kUPPER
Select the upper left pixel.
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:76
MemoryPoolType
The type for memory pools used by TensorRT.
Definition: NvInfer.h:10841
TENSORRTAPI bool setInternalLibraryPath(AsciiChar const *path) noexcept
Set a custom directory path for loading internal TensorRT libraries when building engines.
ScaleMode
Controls how shift, scale and power are applied in a Scale layer.
Definition: NvInfer.h:1889
@ kUNIFORM
Identical coefficients across all elements of the tensor.
@ kCHANNEL
Per-channel coefficients.
RuntimePlatform
Describes the intended runtime platform (operating system and CPU architecture) for the execution of ...
Definition: NvInfer.h:10421
uint32_t QuantizationFlags
Represents one or more QuantizationFlag values using binary OR operations.
Definition: NvInfer.h:10373
HardwareCompatibilityLevel
Describes requirements of compatibility with GPU architectures other than that of the GPU on which th...
Definition: NvInfer.h:10969
CumulativeOperation
Enumerates the cumulative operations that may be performed by a Cumulative layer.
Definition: NvInfer.h:6790
BoundingBoxFormat
Representation of bounding box data used for the Boxes input tensor in INMSLayer.
Definition: NvInfer.h:6328
@ kCENTER_SIZES
(x_center, y_center, width, height) where (x_center, y_center) is the center point of the box
@ kCORNER_PAIRS
(x1, y1, x2, y2) where (x1, y1) and (x2, y2) are any pair of diagonal corners
constexpr int32_t EnumMax< BuilderFlag >() noexcept
Definition: NvInfer.h:10659
constexpr int32_t EnumMax< LayerType >() noexcept
Definition: NvInfer.h:124
@ kFP4
FP4 field type.
@ kINT8
INT8 field type.
@ kFP8
FP8 field type.
@ kBF16
BF16 field type.
@ kINT4
INT4 field type.
constexpr int32_t EnumMax< CalibrationAlgoType >() noexcept
Definition: NvInfer.h:9812
UnaryOperation
Enumerates the unary operations that may be performed by a Unary layer.
Definition: NvInfer.h:2834
@ kISINF
Return true if input value equals +/- infinity for floating-point data type.
@ kCOSH
Hyperbolic cosine.
@ kACOSH
Inverse hyperbolic cosine.
@ kERF
Gauss error function.
@ kISNAN
Return true if input value is a NaN for floating-point data type.
@ kACOS
Inverse cosine.
@ kABS
Absolute value.
@ kSINH
Hyperbolic sine.
@ kROUND
Round to nearest even for floating-point data type.
@ kATANH
Inverse hyperbolic tangent.
@ kASINH
Inverse hyperbolic sine.
@ kSIGN
Sign, If input > 0, output 1; if input < 0, output -1; if input == 0, output 0.
@ kEXP
Exponentiation.
@ kATAN
Inverse tangent.
constexpr int32_t EnumMax< ReduceOperation >() noexcept
Definition: NvInfer.h:2949
constexpr int32_t EnumMax< TripLimit >() noexcept
Definition: NvInfer.h:4582
ActivationType
Enumerates the types of activation to perform in an activation layer.
Definition: NvInfer.h:143
@ kSELU
Selu activation: x>0 ? beta * x : beta * (alpha*exp(x) - alpha)
@ kTANH
TanH activation.
@ kSCALED_TANH
Scaled tanh activation: alpha*tanh(beta*x)
@ kRELU
Rectified linear activation.
@ kELU
Elu activation: x>=0 ? x : alpha * (exp(x) - 1).
@ kLEAKY_RELU
LeakyRelu activation: x>=0 ? x : alpha * x.
@ kSOFTSIGN
Softsign activation: x / (1+|x|)
@ kHARD_SIGMOID
Hard sigmoid activation: max(0, min(1, alpha*x+beta))
@ kTHRESHOLDED_RELU
Thresholded ReLU activation: x>alpha ? x : 0.
@ kSIGMOID
Sigmoid activation.
@ kCLIP
Clip activation: max(alpha, min(beta, x))
@ kGELU_TANH
GELU tanh activation: 0.5 * x * (1 + tanh(sqrt(2/pi) * (0.044715F * pow(x, 3) + x)))
@ kGELU_ERF
GELU erf activation: 0.5 * x * (1 + erf(sqrt(0.5) * x))
@ kSOFTPLUS
Parametric softplus activation: alpha*log(exp(beta*x)+1)
FillOperation
Enumerates the tensor fill operations that may performed by a fill layer.
Definition: NvInfer.h:5162
ResizeRoundMode
The rounding mode for nearest neighbor resize.
Definition: NvInfer.h:4211
@ kHALF_UP
Round half up.
@ kHALF_DOWN
Round half down.
char_t AsciiChar
Definition: NvInferRuntimeBase.h:116
PaddingMode
Enumerates the modes of padding to perform in convolution, deconvolution and pooling layer,...
Definition: NvInfer.h:1067
@ kSAME_LOWER
Use SAME padding, with prePadding >= postPadding.
@ kEXPLICIT_ROUND_DOWN
Use explicit padding, rounding output size down.
@ kEXPLICIT_ROUND_UP
Use explicit padding, rounding output size up.
@ kSAME_UPPER
Use SAME padding, with prePadding <= postPadding.
TripLimit
Enum that describes kinds of trip limits.
Definition: NvInfer.h:4570
@ kWHILE
Tensor is a scalar of type kBOOL. Loop terminates when value is false.
@ kCOUNT
Tensor is a scalar of type kINT32 or kINT64 that contains the trip count.
uint32_t NetworkDefinitionCreationFlags
Represents one or more NetworkDefinitionCreationFlag flags using binary OR operations....
Definition: NvInfer.h:12103
PreviewFeature
Define preview features.
Definition: NvInfer.h:10916
TilingOptimizationLevel
Define the optimization levels for Tiling.
Definition: NvInfer.h:11022
@ kFAST
Use a fast algorithm and heuristic based strategy. Slightly increases engine build time.
@ kFULL
Increase search space even wider. Significantly increases engine build time.
constexpr int32_t EnumMax< GatherMode >() noexcept
Definition: NvInfer.h:2593
DataType
The type of weights and tensors. The datatypes other than kBOOL, kINT32, and kINT64 are "activation d...
Definition: NvInferRuntimeBase.h:146
uint32_t BuilderFlags
Represents one or more BuilderFlag values using binary OR operations, e.g., 1U << BuilderFlag::kFP16 ...
Definition: NvInfer.h:10453
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:1350
constexpr int32_t EnumMax< ScaleMode >() noexcept
Definition: NvInfer.h:1901
CalibrationAlgoType
Version of calibration algorithm to use.
Definition: NvInfer.h:9799
@ kENTROPY_CALIBRATION_2
Entropy calibration.
@ kLEGACY_CALIBRATION
Legacy calibration.
@ kENTROPY_CALIBRATION
Legacy entropy calibration.
@ kMINMAX_CALIBRATION
Minmax calibration.
LayerType
The type values of layer classes.
Definition: NvInfer.h:58
@ kGRID_SAMPLE
Grid sample layer.
@ kRAGGED_SOFTMAX
Ragged softmax layer.
@ kDECONVOLUTION
Deconvolution layer.
@ kREDUCE
Reduce layer.
@ kASSERTION
Assertion layer.
@ kTOPK
TopK layer.
@ kRESIZE
Resize Layer.
@ kCAST
Cast layer.
@ kPADDING
Padding layer.
@ kSQUEEZE
Squeeze Layer.
@ kATTENTION_INPUT
Attention Input.
@ kMATRIX_MULTIPLY
Matrix multiply layer.
@ kCONDITION
Condition layer.
@ kCUMULATIVE
Cumulative layer.
@ kCONDITIONAL_INPUT
Conditional Input layer.
@ kIDENTITY
Identity layer.
@ kNORMALIZATION
Normalization layer.
@ kQUANTIZE
Quantize layer.
@ kSCATTER
Scatter layer.
@ kCONVOLUTION
Convolution layer.
@ kPARAMETRIC_RELU
Parametric ReLU layer.
@ kATTENTION_OUTPUT
Attention Output.
@ kUNSQUEEZE
Unsqueeze Layer.
@ kCONCATENATION
Concatenation layer.
@ kONE_HOT
OneHot layer.
@ kREVERSE_SEQUENCE
Reverse sequence layer.
@ kSLICE
Slice layer.
@ kEINSUM
Einsum layer.
@ kSOFTMAX
SoftMax layer.
@ kSHAPE
Shape layer.
@ kROTARY_EMBEDDING
Rotary Embedding layer.
@ kRECURRENCE
Loop Recurrence layer.
@ kDEQUANTIZE
Dequantize layer.
@ kSHUFFLE
Shuffle layer.
@ kPLUGIN_V3
PluginV3 layer.
@ kITERATOR
Loop Iterator layer.
@ kPOOLING
Pooling layer.
@ kTRIP_LIMIT
Loop Trip limit layer.
@ kSCALE
Scale layer.
@ kDYNAMIC_QUANTIZE
Dynamic Quantize layer.
@ kGATHER
Gather layer.
@ kUNARY
UnaryOp operation Layer.
@ kACTIVATION
Activation layer.
@ kELEMENTWISE
Elementwise layer.
@ kSELECT
Select layer.
@ kPLUGIN_V2
PluginV2 layer.
@ kLOOP_OUTPUT
Loop output layer.
@ kCONDITIONAL_OUTPUT
Conditional Output layer.
@ kCONSTANT
Constant layer.
@ kNON_ZERO
NonZero layer.
@ kFILL
Fill layer.
@ kKVCACHE_UPDATE
KV Cache Update layer.
@ kPLUGIN
Plugin layer.
@ kDIST_COLLECTIVE
DistCollective layer.
constexpr int32_t EnumMax< QuantizationFlag >() noexcept
Definition: NvInfer.h:10398
SampleMode
Controls how ISliceLayer and IGridSample handle out-of-bounds coordinates.
Definition: NvInfer.h:3325
@ kCLAMP
Out of bounds indices are clamped to bounds.
@ kSTRICT_BOUNDS
Fail with error when the coordinates are out of bounds.
@ kWRAP
Coordinates wrap around periodically.
GatherMode
Control form of IGatherLayer.
Definition: NvInfer.h:2581
@ kDEFAULT
Similar to ONNX Gather.
@ kELEMENT
Similar to ONNX GatherElements.
@ kND
Similar to ONNX GatherND.
MoEActType
Enumerates the activation type for the MoE layer.
Definition: NvInfer.h:7554
uint32_t TensorFormats
It is capable of representing one or more TensorFormat by binary OR operations, e....
Definition: NvInfer.h:135
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
Definition: NvInferRuntime.h:2973
NetworkDefinitionCreationFlag
List of immutable network properties expressed at network creation time. NetworkDefinitionCreationFla...
Definition: NvInfer.h:12114
ElementWiseOperation
Enumerates the binary operations that may be performed by an ElementWise layer.
Definition: NvInfer.h:2491
@ kSUB
Subtract the second element from the first.
@ kSUM
Sum of the two elements.
@ kPROD
Product of the two elements.
@ kFLOOR_DIV
Floor division of the first element by the second.
@ kEQUAL
Check if two elements are equal.
@ kAND
Logical AND of two elements.
@ kOR
Logical OR of two elements.
@ kMIN
Minimum of the two elements.
@ kPOW
The first element to the power of the second element.
@ kLESS
Check if element in first tensor is less than corresponding element in second tensor.
@ kGREATER
Check if element in first tensor is greater than corresponding element in second tensor.
@ kXOR
Logical XOR of two elements.
@ kDIV
Divide the first element by the second.
QuantizationFlag
List of valid flags for quantizing the network to int8.
Definition: NvInfer.h:10385
CollectiveOperation
Enumerates the collective operations that may be performed by a DistCollective layer.
Definition: NvInfer.h:2962
@ kREDUCE_SCATTER
Reduce scatter.
constexpr int32_t EnumMax< SampleMode >() noexcept
Definition: NvInfer.h:3341
InterpolationMode
Enumerates various modes of interpolation.
Definition: NvInfer.h:4099
@ kNEAREST
ND (0 < N <= 8) nearest neighbor resizing.
@ kCUBIC
Supports bicubic (2D) interpolation.
@ kLINEAR
Supports linear (1D), bilinear (2D), and trilinear (3D) interpolation.
BuilderFlag
List of valid modes that the builder can enable when creating an engine from a network definition.
Definition: NvInfer.h:10463
@ kWEIGHT_STREAMING
Enable weight streaming for the current engine.
@ kGPU_FALLBACK
Enable layers marked to execute on GPU if layer cannot execute on DLA.
@ kSPARSE_WEIGHTS
Allow the builder to examine weights and use optimized functions when weights have suitable sparsity.
@ kEDITABLE_TIMING_CACHE
Enable editable timing cache.
@ kSTRIP_PLAN
Strip the refittable weights from the engine plan file.
@ kMONITOR_MEMORY
Enable memory monitor during build time.
@ kDISABLE_TIMING_CACHE
Disable reuse of timing information across identical layers.
@ kREFIT
Enable building a refittable engine.
constexpr int32_t EnumMax< TopKOperation >() noexcept
Definition: NvInfer.h:3617
TENSORRTAPI nvinfer1::IPluginRegistry * getBuilderPluginRegistry(nvinfer1::EngineCapability capability) noexcept
Return the plugin registry for building a Standard engine, or nullptr if no registry exists.
constexpr int32_t EnumMax< MemoryPoolType >() noexcept
Definition: NvInfer.h:10902
TopKOperation
Enumerates the operations that may be performed by a TopK layer.
Definition: NvInfer.h:3606
ReduceOperation
Enumerates the reduce operations that may be performed by a Reduce layer.
Definition: NvInfer.h:2934
@ kAVG
Average of the elements.
constexpr int32_t EnumMax< LoopOutput >() noexcept
Definition: NvInfer.h:4559
constexpr int32_t EnumMax< NetworkDefinitionCreationFlag >() noexcept
Definition: NvInfer.h:12142
TRT_DEPRECATED_API nvinfer1::safe::IPluginRegistry * getBuilderSafePluginRegistry(nvinfer1::EngineCapability capability) noexcept
Return the plugin registry for building a Safety engine, or nullptr if no registry exists.
ScatterMode
Control form of IScatterLayer.
Definition: NvInfer.h:6062
MatrixOperation
Enumerates the operations that may be performed on a tensor by IMatrixMultiplyLayer before multiplica...
Definition: NvInfer.h:3759
@ kTRANSPOSE
Like kNONE, but transpose the matrix dimensions.
ResizeCoordinateTransformation
The resize coordinate transformation function.
Definition: NvInfer.h:4127
constexpr int32_t EnumMax< UnaryOperation >() noexcept
Definition: NvInfer.h:2868
LoopOutput
Enum that describes kinds of loop outputs.
Definition: NvInfer.h:4542
@ kLAST_VALUE
Output value is value of tensor for last iteration.
@ kCONCATENATE
Output value is concatenation of values of tensor for each iteration, in forward order.
@ kREVERSE
Output value is concatenation of values of tensor for each iteration, in reverse order.
constexpr int32_t EnumMax< BoundingBoxFormat >() noexcept
Definition: NvInfer.h:6341
constexpr int32_t EnumMax< MatrixOperation >() noexcept
Definition: NvInfer.h:3787
KVCacheMode
Enumerates the KVCache modes that may be performed by a KVCacheUpdate layer.
Definition: NvInfer.h:7466
PoolingType
The type of pooling to perform in a pooling layer.
Definition: NvInfer.h:1503
@ kAVERAGE
Average over elements. If the tensor is padded, the count includes the padding.
@ kMAX
Maximum over elements.
@ kMAX_AVERAGE_BLEND
Blending between max and average pooling: (1-blendFactor)*maxPool + blendFactor*avgPool.
v_1_0::IProgressMonitor IProgressMonitor
Definition: NvInfer.h:11138
constexpr int32_t EnumMax< FillOperation >() noexcept
Definition: NvInfer.h:5196
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntime.h:204
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:2635
AttentionNormalizationOp
Enumerates the operations that may be performed by the normalization in the attention subgraph.
Definition: NvInfer.h:6925
constexpr int32_t EnumMax< ScatterMode >() noexcept
Definition: NvInfer.h:6073
Represents a permutation of dimensions.
Definition: NvInfer.h:3134
Declaration of EnumMaxImpl struct to store maximum number of elements in an enumeration type.
Definition: NvInferRuntimeBase.h:129
The key to retrieve timing cache entries.
Definition: NvInfer.h:10683
Definition: NvInfer.h:10695
uint64_t tacticHash
Hash of the selected tactic.
Definition: NvInfer.h:10697
float timingMSec
Timing of this tactic in milliseconds. Negative numbers and NaN are invalid values.
Definition: NvInfer.h:10699

  Copyright © 2024 NVIDIA Corporation
  Privacy Policy | Manage My Privacy | Do Not Sell or Share My Data | Terms of Service | Accessibility | Corporate Policies | Product Security | Contact