TensorRT for RTX 1.2.0
NvInferRuntime.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef NV_INFER_RUNTIME_H
19#define NV_INFER_RUNTIME_H
20
26
27#include "NvInferImpl.h" // IWYU pragma: export
28#define NV_INFER_INTERNAL_INCLUDE 1
29#include "NvInferPluginBase.h" // IWYU pragma: export
30#undef NV_INFER_INTERNAL_INCLUDE
31#include "NvInferRuntimeCommon.h" // IWYU pragma: export
32
33namespace nvinfer1
34{
35
36class IExecutionContext;
37class ICudaEngine;
38class IPluginFactory;
39class IEngineInspector;
40
49
51{
52protected:
53 INoCopy() = default;
54 virtual ~INoCopy() = default;
55 INoCopy(INoCopy const& other) = delete;
56 INoCopy& operator=(INoCopy const& other) = delete;
57 INoCopy(INoCopy&& other) = delete;
58 INoCopy& operator=(INoCopy&& other) = delete;
59};
60
75enum class EngineCapability : int32_t
76{
81 kSTANDARD = 0,
82
89 kSAFETY = 1,
90
97};
98
99namespace impl
100{
102template <>
104{
105 static constexpr int32_t kVALUE = 3;
106};
107} // namespace impl
108
124{
125public:
127 void const* values;
128 int64_t count;
129};
130
141class IHostMemory : public INoCopy
142{
143public:
144 virtual ~IHostMemory() noexcept = default;
145
147 void* data() const noexcept
148 {
149 return mImpl->data();
150 }
151
153 std::size_t size() const noexcept
154 {
155 return mImpl->size();
156 }
157
159 DataType type() const noexcept
160 {
161 return mImpl->type();
162 }
163
164protected:
165 apiv::VHostMemory* mImpl;
166};
167
178enum class DimensionOperation : int32_t
179{
180 kSUM = 0,
181 kPROD = 1,
182 kMAX = 2,
183 kMIN = 3,
184 kSUB = 4,
185 kEQUAL = 5,
186 kLESS = 6,
187 kFLOOR_DIV = 7,
188 kCEIL_DIV = 8
189};
190
192template <>
193constexpr inline int32_t EnumMax<DimensionOperation>() noexcept
194{
195 return 9;
196}
197
203enum class TensorLocation : int32_t
204{
205 kDEVICE = 0,
206 kHOST = 1,
207};
208
209namespace impl
210{
212template <>
214{
215 static constexpr int32_t kVALUE = 2;
216};
217} // namespace impl
218
232{
233public:
237 bool isConstant() const noexcept
238 {
239 return mImpl->isConstant();
240 }
241
248 int64_t getConstantValue() const noexcept
249 {
250 return mImpl->getConstantValue();
251 }
252
253protected:
254 apiv::VDimensionExpr* mImpl;
255 virtual ~IDimensionExpr() noexcept = default;
256
257public:
263 bool isSizeTensor() const noexcept
264 {
265 return mImpl->isSizeTensor();
266 }
267};
268
286class IExprBuilder : public INoCopy
287{
288public:
292 IDimensionExpr const* constant(int64_t value) noexcept
293 {
294 return mImpl->constant(value);
295 }
296
304 DimensionOperation op, IDimensionExpr const& first, IDimensionExpr const& second) noexcept
305 {
306 return mImpl->operation(op, first, second);
307 }
308
309protected:
310 apiv::VExprBuilder* mImpl;
311 virtual ~IExprBuilder() noexcept = default;
312
313public:
338 IDimensionExpr const* declareSizeTensor(int32_t outputIndex, IDimensionExpr const& opt, IDimensionExpr const& upper)
339 {
340 return mImpl->declareSizeTensor(outputIndex, opt, upper);
341 }
342};
343
350{
351public:
352 int32_t nbDims;
354};
355
362{
365
368
371
374};
375
407{
408public:
409 IPluginV2DynamicExt* clone() const noexcept override = 0;
410
435 virtual DimsExprs getOutputDimensions(
436 int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept = 0;
437
441 static constexpr int32_t kFORMAT_COMBINATION_LIMIT = 100;
442
475 virtual bool supportsFormatCombination(
476 int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
477
515 virtual void configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs,
516 DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
517
527 virtual size_t getWorkspaceSize(PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs,
528 int32_t nbOutputs) const noexcept = 0;
529
542 virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc,
543 void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0;
544
545protected:
553 int32_t getTensorRTVersion() const noexcept override
554 {
555 return (static_cast<int32_t>(PluginVersion::kV2_DYNAMICEXT) << 24 | (NV_TENSORRT_VERSION & 0xFFFFFF));
556 }
557
558 virtual ~IPluginV2DynamicExt() noexcept {}
559
560private:
561 // Following are obsolete base class methods, and must not be implemented or used.
562
566 void configurePlugin(Dims const*, int32_t, Dims const*, int32_t, DataType const*, DataType const*, bool const*,
567 bool const*, PluginFormat, int32_t) noexcept override final
568 {
569 }
570
574 bool supportsFormat(DataType, PluginFormat) const noexcept override final
575 {
576 return false;
577 }
578
582 Dims getOutputDimensions(int32_t, Dims const*, int32_t) noexcept override final
583 {
584 return Dims{-1, {}};
585 }
586
594 TRT_DEPRECATED bool isOutputBroadcastAcrossBatch(int32_t, bool const*, int32_t) const noexcept override final
595 {
596 return false;
597 }
598
606 TRT_DEPRECATED bool canBroadcastInputAcrossBatch(int32_t) const noexcept override final
607 {
608 return true;
609 }
610
614 size_t getWorkspaceSize(int32_t) const noexcept override final
615 {
616 return 0;
617 }
618
622 int32_t enqueue(int32_t, void const* const*, void* const*, void*, cudaStream_t) noexcept override final
623 {
624 return 1;
625 }
626};
627
628namespace v_1_0
629{
631{
632public:
637 ~IStreamReader() override = default;
638 IStreamReader() = default;
639
643 InterfaceInfo getInterfaceInfo() const noexcept override
644 {
645 return InterfaceInfo{"IStreamReader", 1, 0};
646 }
647
656 virtual int64_t read(void* destination, int64_t nbBytes) = 0;
657
658protected:
659 IStreamReader(IStreamReader const&) = default;
663};
664
666{
667public:
672 ~IStreamWriter() override = default;
673 IStreamWriter() = default;
674
678 InterfaceInfo getInterfaceInfo() const noexcept final
679 {
680 return InterfaceInfo{"IStreamWriter", 1, 0};
681 }
682
692 virtual int64_t write(void const* data, int64_t nbBytes) = 0;
693
694protected:
695 IStreamWriter(IStreamWriter const&) = default;
699};
700} // namespace v_1_0
701
711
721
726enum class SeekPosition : int32_t
727{
729 kSET = 0,
730
732 kCUR = 1,
733
735 kEND = 2,
736};
737
738namespace v_1_0
739{
741{
742public:
747 ~IStreamReaderV2() override = default;
748 IStreamReaderV2() = default;
749
753 InterfaceInfo getInterfaceInfo() const noexcept override
754 {
755 return InterfaceInfo{"IStreamReaderV2", 1, 0};
756 }
757
768 virtual int64_t read(void* destination, int64_t nbBytes, cudaStream_t stream) noexcept = 0;
769
778 virtual bool seek(int64_t offset, SeekPosition where) noexcept = 0;
779
780protected:
785};
786} // namespace v_1_0
787
798
813{
814public:
819 virtual IGpuAllocator* getGpuAllocator() const noexcept = 0;
820
825 virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
826 virtual ~IPluginResourceContext() noexcept = default;
827
828protected:
832 IPluginResourceContext& operator=(IPluginResourceContext const&) & = default;
834};
835
836namespace v_1_0
837{
839{
840public:
844 InterfaceInfo getInterfaceInfo() const noexcept override
845 {
846 return InterfaceInfo{"PLUGIN_V3ONE_CORE", 1, 0};
847 }
848
857 virtual AsciiChar const* getPluginName() const noexcept = 0;
858
867 virtual AsciiChar const* getPluginVersion() const noexcept = 0;
868
878 virtual AsciiChar const* getPluginNamespace() const noexcept = 0;
879};
880
882{
883public:
889 static constexpr int32_t kDEFAULT_FORMAT_COMBINATION_LIMIT = 100;
890
894 InterfaceInfo getInterfaceInfo() const noexcept override
895 {
896 return InterfaceInfo{"PLUGIN_V3ONE_BUILD", 1, 0};
897 }
898
918 virtual int32_t configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs,
919 DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
920
937 virtual int32_t getOutputDataTypes(
938 DataType* outputTypes, int32_t nbOutputs, const DataType* inputTypes, int32_t nbInputs) const noexcept = 0;
939
961 virtual int32_t getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, DimsExprs const* shapeInputs,
962 int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept = 0;
963
999 int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
1000
1006 virtual int32_t getNbOutputs() const noexcept = 0;
1007
1017 virtual size_t getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs,
1018 DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept
1019 {
1020 return 0;
1021 }
1022
1054 virtual int32_t getValidTactics(int32_t* tactics, int32_t nbTactics) noexcept
1055 {
1056 return 0;
1057 }
1058
1062 virtual int32_t getNbTactics() noexcept
1063 {
1064 return 0;
1065 }
1066
1078 virtual char const* getTimingCacheID() noexcept
1079 {
1080 return nullptr;
1081 }
1082
1086 virtual int32_t getFormatCombinationLimit() noexcept
1087 {
1088 return kDEFAULT_FORMAT_COMBINATION_LIMIT;
1089 }
1090
1097 virtual char const* getMetadataString() noexcept
1098 {
1099 return nullptr;
1100 }
1101};
1102
1104{
1105public:
1109 InterfaceInfo getInterfaceInfo() const noexcept override
1110 {
1111 return InterfaceInfo{"PLUGIN_V3ONE_RUNTIME", 1, 0};
1112 }
1113
1121 virtual int32_t setTactic(int32_t tactic) noexcept
1122 {
1123 return 0;
1124 }
1125
1144 virtual int32_t onShapeChange(
1145 PluginTensorDesc const* in, int32_t nbInputs, PluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
1146
1160 virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc,
1161 void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0;
1162
1182 virtual IPluginV3* attachToContext(IPluginResourceContext* context) noexcept = 0;
1183
1189
1193 virtual PluginFieldCollection const* getFieldsToSerialize() noexcept = 0;
1194};
1195} // namespace v_1_0
1196
1197namespace v_2_0
1198{
1199
1201{
1202public:
1203 InterfaceInfo getInterfaceInfo() const noexcept override
1204 {
1205 return InterfaceInfo{"PLUGIN_V3ONE_BUILD", 2, 0};
1206 }
1207
1237 virtual int32_t getAliasedInput(int32_t outputIndex) noexcept
1238 {
1239 return -1;
1240 }
1241};
1242
1243} // namespace v_2_0
1244
1255
1267
1279
1288
1289namespace v_1_0
1290{
1292{
1293public:
1301 virtual void reportLayerTime(char const* layerName, float ms) noexcept = 0;
1302
1303 virtual ~IProfiler() noexcept {}
1304};
1305} // namespace v_1_0
1306
1319
1327enum class WeightsRole : int32_t
1328{
1329 kKERNEL = 0,
1330 kBIAS = 1,
1331 kSHIFT = 2,
1332 kSCALE = 3,
1333 kCONSTANT = 4,
1334 kANY = 5,
1335};
1336
1338template <>
1339constexpr inline int32_t EnumMax<WeightsRole>() noexcept
1340{
1341 return 6;
1342}
1343
1349enum class DeviceType : int32_t
1350{
1351 kGPU = 0,
1352 kDLA = 1,
1353};
1354
1356template <>
1357constexpr inline int32_t EnumMax<DeviceType>() noexcept
1358{
1359 return 2;
1360}
1361
1372enum class TempfileControlFlag : int32_t
1373{
1376
1381};
1382
1384template <>
1385constexpr inline int32_t EnumMax<TempfileControlFlag>() noexcept
1386{
1387 return 2;
1388}
1389
1396using TempfileControlFlags = uint32_t;
1397
1429enum class TensorFormat : int32_t
1430{
1437 kLINEAR = 0,
1438
1443 kCHW2 = 1,
1444
1448 kHWC8 = 2,
1449
1463 kCHW4 = 3,
1464
1471 kCHW16 = 4,
1472
1480 kCHW32 = 5,
1481
1486 kDHWC8 = 6,
1487
1492 kCDHW32 = 7,
1493
1497 kHWC = 8,
1498
1507 kDLA_LINEAR = 9,
1508
1522 kDLA_HWC4 = 10,
1523
1528 kHWC16 = 11,
1529
1534 kDHWC = 12
1535};
1536
1537namespace impl
1538{
1540template <>
1542{
1544 static constexpr int32_t kVALUE = 13;
1545};
1546} // namespace impl
1547
1553enum class AllocatorFlag : int32_t
1554{
1556 kRESIZABLE = 0,
1557};
1558
1559namespace impl
1560{
1562template <>
1564{
1566 static constexpr int32_t kVALUE = 1;
1567};
1568} // namespace impl
1569
1570using AllocatorFlags = uint32_t;
1571
1574
1588{
1589public:
1595 enum class Severity : int32_t
1596 {
1598 kINTERNAL_ERROR = 0,
1600 kERROR = 1,
1602 kWARNING = 2,
1604 kINFO = 3,
1606 kVERBOSE = 4,
1607 };
1608
1627 virtual void log(Severity severity, AsciiChar const* msg) noexcept = 0;
1628
1629 ILogger() = default;
1630 virtual ~ILogger() = default;
1631
1632protected:
1633 // @cond SuppressDoxyWarnings
1634 ILogger(ILogger const&) = default;
1635 ILogger(ILogger&&) = default;
1636 ILogger& operator=(ILogger const&) & = default;
1637 ILogger& operator=(ILogger&&) & = default;
1638 // @endcond
1639};
1640
1641namespace impl
1642{
1644template <>
1645struct EnumMaxImpl<ILogger::Severity>
1646{
1648 static constexpr int32_t kVALUE = 5;
1649};
1650} // namespace impl
1651
1652namespace v_1_0
1653{
1654
1656{
1657public:
1683 uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept = 0;
1684
1685 ~IGpuAllocator() override = default;
1686 IGpuAllocator() = default;
1687
1725 virtual void* reallocate(void* const /*baseAddr*/, uint64_t /*alignment*/, uint64_t /*newSize*/) noexcept
1726 {
1727 return nullptr;
1728 }
1729
1748 TRT_DEPRECATED virtual bool deallocate(void* const memory) noexcept = 0;
1749
1778 virtual void* allocateAsync(
1779 uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t /*stream*/) noexcept
1780 {
1781 return allocate(size, alignment, flags);
1782 }
1811 virtual bool deallocateAsync(void* const memory, cudaStream_t /*stream*/) noexcept
1812 {
1813 return deallocate(memory);
1814 }
1815
1819 InterfaceInfo getInterfaceInfo() const noexcept override
1820 {
1821 return {"IGpuAllocator", 1, 0};
1822 }
1823
1824protected:
1825 // @cond SuppressDoxyWarnings
1826 IGpuAllocator(IGpuAllocator const&) = default;
1827 IGpuAllocator(IGpuAllocator&&) = default;
1828 IGpuAllocator& operator=(IGpuAllocator const&) & = default;
1829 IGpuAllocator& operator=(IGpuAllocator&&) & = default;
1830 // @endcond
1831};
1832
1833} // namespace v_1_0
1834
1856
1862enum class EngineValidity : int32_t
1863{
1865 kVALID = 0,
1866
1868 kSUBOPTIMAL = 1,
1869
1871 kINVALID = 2,
1872};
1873
1875namespace impl
1876{
1877template <>
1879{
1880 static constexpr int32_t kVALUE = 3;
1881};
1882} // namespace impl
1883
1888enum class EngineInvalidityDiagnostics : uint64_t
1889{
1891 kVERSION_MISMATCH = 1ULL << 0,
1892
1894 kUNSUPPORTED_CC = 1ULL << 1,
1895
1897 kOLD_CUDA_DRIVER = 1ULL << 2,
1898
1900 kOLD_CUDA_RUNTIME = 1ULL << 3,
1901
1903 kINSUFFICIENT_GPU_MEMORY = 1ULL << 4,
1904
1906 kMALFORMED_ENGINE = 1ULL << 5,
1907
1909 kCUDA_ERROR = 1ULL << 6,
1910};
1911
1912
1920class IRuntime : public INoCopy
1921{
1922public:
1923 virtual ~IRuntime() noexcept = default;
1924
1936 void setDLACore(int32_t dlaCore) noexcept
1937 {
1938 mImpl->setDLACore(dlaCore);
1939 }
1940
1946 int32_t getDLACore() const noexcept
1947 {
1948 return mImpl->getDLACore();
1949 }
1950
1954 int32_t getNbDLACores() const noexcept
1955 {
1956 return mImpl->getNbDLACores();
1957 }
1958
1970 void setGpuAllocator(IGpuAllocator* allocator) noexcept
1971 {
1972 mImpl->setGpuAllocator(allocator);
1973 }
1974
1986 //
1989 void setErrorRecorder(IErrorRecorder* recorder) noexcept
1990 {
1991 mImpl->setErrorRecorder(recorder);
1992 }
1993
2005 {
2006 return mImpl->getErrorRecorder();
2007 }
2008
2022 ICudaEngine* deserializeCudaEngine(void const* blob, std::size_t size) noexcept
2023 {
2024 return mImpl->deserializeCudaEngine(blob, size);
2025 }
2026
2046 {
2047 return mImpl->deserializeCudaEngineV2(streamReader);
2048 }
2049
2055 ILogger* getLogger() const noexcept
2056 {
2057 return mImpl->getLogger();
2058 }
2059
2070 bool setMaxThreads(int32_t maxThreads) noexcept
2071 {
2072 return mImpl->setMaxThreads(maxThreads);
2073 }
2074
2084 int32_t getMaxThreads() const noexcept
2085 {
2086 return mImpl->getMaxThreads();
2087 }
2088
2119 void setTemporaryDirectory(char const* path) noexcept
2120 {
2121 return mImpl->setTemporaryDirectory(path);
2122 }
2123
2130 char const* getTemporaryDirectory() const noexcept
2131 {
2132 return mImpl->getTemporaryDirectory();
2133 }
2134
2147 {
2148 return mImpl->setTempfileControlFlags(flags);
2149 }
2150
2159 {
2160 return mImpl->getTempfileControlFlags();
2161 }
2162
2169 {
2170 return mImpl->getPluginRegistry();
2171 }
2172
2186 IRuntime* loadRuntime(char const* path) noexcept
2187 {
2188 return mImpl->loadRuntime(path);
2189 }
2190
2198 void setEngineHostCodeAllowed(bool allowed) noexcept
2199 {
2200 return mImpl->setEngineHostCodeAllowed(allowed);
2201 }
2202
2208 bool getEngineHostCodeAllowed() const noexcept
2209 {
2210 return mImpl->getEngineHostCodeAllowed();
2211 }
2212
2220 int64_t getEngineHeaderSize() const noexcept {
2221 return mImpl->getEngineHeaderSize();
2222 }
2223
2247 EngineValidity getEngineValidity(void const* blob, int64_t blobSize, uint64_t* diagnostics) const noexcept {
2248 return mImpl->getEngineValidity(blob, blobSize, diagnostics);
2249 }
2250
2251
2252protected:
2253 apiv::VRuntime* mImpl;
2254};
2255
2263class IRefitter : public INoCopy
2264{
2265public:
2266 virtual ~IRefitter() noexcept = default;
2267
2283 bool setWeights(char const* layerName, WeightsRole role, Weights weights) noexcept
2284 {
2285 return mImpl->setWeights(layerName, role, weights);
2286 }
2287
2300 bool refitCudaEngine() noexcept
2301 {
2302 return mImpl->refitCudaEngine();
2303 }
2304
2321 int32_t getMissing(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
2322 {
2323 return mImpl->getMissing(size, layerNames, roles);
2324 }
2325
2338 int32_t getAll(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
2339 {
2340 return mImpl->getAll(size, layerNames, roles);
2341 }
2342
2354 //
2357 void setErrorRecorder(IErrorRecorder* recorder) noexcept
2358 {
2359 mImpl->setErrorRecorder(recorder);
2360 }
2361
2373 {
2374 return mImpl->getErrorRecorder();
2375 }
2376
2397 bool setNamedWeights(char const* name, Weights weights) noexcept
2398 {
2399 return mImpl->setNamedWeights(name, weights);
2400 }
2401
2417 int32_t getMissingWeights(int32_t size, char const** weightsNames) noexcept
2418 {
2419 return mImpl->getMissingWeights(size, weightsNames);
2420 }
2421
2433 int32_t getAllWeights(int32_t size, char const** weightsNames) noexcept
2434 {
2435 return mImpl->getAllWeights(size, weightsNames);
2436 }
2437
2443 ILogger* getLogger() const noexcept
2444 {
2445 return mImpl->getLogger();
2446 }
2447
2459 bool setMaxThreads(int32_t maxThreads) noexcept
2460 {
2461 return mImpl->setMaxThreads(maxThreads);
2462 }
2463
2473 int32_t getMaxThreads() const noexcept
2474 {
2475 return mImpl->getMaxThreads();
2476 }
2477
2500 bool setNamedWeights(char const* name, Weights weights, TensorLocation location) noexcept
2501 {
2502 return mImpl->setNamedWeightsWithLocation(name, weights, location);
2503 }
2504
2516 Weights getNamedWeights(char const* weightsName) const noexcept
2517 {
2518 return mImpl->getNamedWeights(weightsName);
2519 }
2520
2532 TensorLocation getWeightsLocation(char const* weightsName) const noexcept
2533 {
2534 return mImpl->getWeightsLocation(weightsName);
2535 }
2536
2548 bool unsetNamedWeights(char const* weightsName) noexcept
2549 {
2550 return mImpl->unsetNamedWeights(weightsName);
2551 }
2552
2564 void setWeightsValidation(bool weightsValidation) noexcept
2565 {
2566 return mImpl->setWeightsValidation(weightsValidation);
2567 }
2568
2572 bool getWeightsValidation() const noexcept
2573 {
2574 return mImpl->getWeightsValidation();
2575 }
2576
2594 bool refitCudaEngineAsync(cudaStream_t stream) noexcept
2595 {
2596 return mImpl->refitCudaEngineAsync(stream);
2597 }
2598
2612 Weights getWeightsPrototype(char const* weightsName) const noexcept
2613 {
2614 return mImpl->getWeightsPrototype(weightsName);
2615 }
2616
2617protected:
2618 apiv::VRefitter* mImpl;
2619};
2620
2631enum class OptProfileSelector : int32_t
2632{
2633 kMIN = 0,
2634 kOPT = 1,
2635 kMAX = 2
2636};
2637
2643template <>
2644constexpr inline int32_t EnumMax<OptProfileSelector>() noexcept
2645{
2646 return 3;
2647}
2648
2672{
2673public:
2701 bool setDimensions(char const* inputName, OptProfileSelector select, Dims const& dims) noexcept
2702 {
2703 return mImpl->setDimensions(inputName, select, dims);
2704 }
2705
2713 Dims getDimensions(char const* inputName, OptProfileSelector select) const noexcept
2714 {
2715 return mImpl->getDimensions(inputName, select);
2716 }
2717
2766 char const* inputName, OptProfileSelector select, int32_t const* values, int32_t nbValues) noexcept
2767 {
2768 return mImpl->setShapeValues(inputName, select, values, nbValues);
2769 }
2770
2779 int32_t getNbShapeValues(char const* inputName) const noexcept
2780 {
2781 return mImpl->getNbShapeValues(inputName);
2782 }
2783
2793 TRT_DEPRECATED int32_t const* getShapeValues(char const* inputName, OptProfileSelector select) const noexcept
2794 {
2795 return mImpl->getShapeValues(inputName, select);
2796 }
2797
2811 bool setExtraMemoryTarget(float target) noexcept
2812 {
2813 return mImpl->setExtraMemoryTarget(target);
2814 }
2815
2823 float getExtraMemoryTarget() const noexcept
2824 {
2825 return mImpl->getExtraMemoryTarget();
2826 }
2827
2840 bool isValid() const noexcept
2841 {
2842 return mImpl->isValid();
2843 }
2844
2891 char const* inputName, OptProfileSelector select, int64_t const* values, int32_t nbValues) noexcept
2892 {
2893 return mImpl->setShapeValuesV2(inputName, select, values, nbValues);
2894 }
2895
2903 int64_t const* getShapeValuesV2(char const* inputName, OptProfileSelector select) const noexcept
2904 {
2905 return mImpl->getShapeValuesV2(inputName, select);
2906 }
2907
2908protected:
2909 apiv::VOptimizationProfile* mImpl;
2910 virtual ~IOptimizationProfile() noexcept = default;
2911};
2912
2920enum class TacticSource : int32_t
2921{
2926
2930
2935
2940
2944};
2945
2946template <>
2947constexpr inline int32_t EnumMax<TacticSource>() noexcept
2948{
2949 return 5;
2950}
2951
2958using TacticSources = uint32_t;
2959
2969enum class ProfilingVerbosity : int32_t
2970{
2971 kLAYER_NAMES_ONLY = 0,
2972 kNONE = 1,
2973 kDETAILED = 2,
2974};
2975
2977template <>
2978constexpr inline int32_t EnumMax<ProfilingVerbosity>() noexcept
2979{
2980 return 3;
2981}
2982
2989using SerializationFlags = uint32_t;
2990
2998enum class SerializationFlag : int32_t
2999{
3000 kEXCLUDE_WEIGHTS = 0,
3002 kINCLUDE_REFIT = 2,
3003};
3004
3006template <>
3007constexpr inline int32_t EnumMax<SerializationFlag>() noexcept
3008{
3009 return 3;
3010}
3011
3020{
3021public:
3022 virtual ~ISerializationConfig() noexcept = default;
3023
3035 bool setFlags(SerializationFlags serializationFlags) noexcept
3036 {
3037 return mImpl->setFlags(serializationFlags);
3038 }
3039
3048 {
3049 return mImpl->getFlags();
3050 }
3051
3059 bool clearFlag(SerializationFlag serializationFlag) noexcept
3060 {
3061 return mImpl->clearFlag(serializationFlag);
3062 }
3063
3071 bool setFlag(SerializationFlag serializationFlag) noexcept
3072 {
3073 return mImpl->setFlag(serializationFlag);
3074 }
3075
3083 bool getFlag(SerializationFlag serializationFlag) const noexcept
3084 {
3085 return mImpl->getFlag(serializationFlag);
3086 }
3087
3088protected:
3089 apiv::VSerializationConfig* mImpl;
3090};
3091
3104{
3105 kSTATIC = 0,
3106 kON_PROFILE_CHANGE = 1,
3107 kUSER_MANAGED = 2,
3108};
3109
3115template <>
3116constexpr inline int32_t EnumMax<ExecutionContextAllocationStrategy>() noexcept
3117{
3118 return 3;
3119}
3120
3129{
3130public:
3131 virtual ~IRuntimeCache() noexcept = default;
3132
3138 IHostMemory* serialize() const noexcept
3139 {
3140 return mImpl->serialize();
3141 }
3142
3149 bool deserialize(void const* blob, size_t size) noexcept
3150 {
3151 return mImpl->deserialize(blob, size);
3152 }
3153
3157 bool reset() noexcept
3158 {
3159 return mImpl->reset();
3160 }
3161
3162protected:
3163 apiv::VRuntimeCache* mImpl;
3164};
3165
3178{
3184 kLAZY = 0,
3185
3190 kEAGER = 1,
3191
3195 kNONE = 2,
3196};
3197
3203template <>
3205{
3206 return 3;
3207}
3208
3216enum class CudaGraphStrategy : int32_t
3217{
3221 kDISABLED = 0,
3222
3231};
3232
3238template <>
3239constexpr inline int32_t EnumMax<CudaGraphStrategy>() noexcept
3240{
3241 return 2;
3242}
3243
3244
3252{
3253public:
3254 virtual ~IRuntimeConfig() noexcept = default;
3255
3261 void setExecutionContextAllocationStrategy(ExecutionContextAllocationStrategy strategy) noexcept
3262 {
3263 return mImpl->setExecutionContextAllocationStrategy(strategy);
3264 }
3265
3272 {
3273 return mImpl->getExecutionContextAllocationStrategy();
3274 }
3275
3282 {
3283 return mImpl->createRuntimeCache();
3284 }
3285
3291 bool setRuntimeCache(IRuntimeCache const& cache) noexcept
3292 {
3293 return mImpl->setRuntimeCache(cache);
3294 }
3295
3302 {
3303 return mImpl->getRuntimeCache();
3304 }
3305
3312 DynamicShapesKernelSpecializationStrategy dynamicShapesKernelSpecializationStrategy) noexcept
3313 {
3314 return mImpl->setDynamicShapesKernelSpecializationStrategy(dynamicShapesKernelSpecializationStrategy);
3315 }
3316
3323 {
3324 return mImpl->getDynamicShapesKernelSpecializationStrategy();
3325 }
3326
3347 {
3348 return mImpl->setCudaGraphStrategy(strategy);
3349 }
3350
3357 {
3358 return mImpl->getCudaGraphStrategy();
3359 }
3360
3361protected:
3362 apiv::VRuntimeConfig* mImpl;
3363}; // class IRuntimeConfig
3364
3373enum class EngineStat : int32_t
3374{
3377
3380};
3381
3387template <>
3388constexpr inline int32_t EnumMax<EngineStat>() noexcept
3389{
3390 return 2;
3391}
3392
3400class ICudaEngine : public INoCopy
3401{
3402public:
3403 virtual ~ICudaEngine() noexcept = default;
3404
3415 Dims getTensorShape(char const* tensorName) const noexcept
3416 {
3417 return mImpl->getTensorShape(tensorName);
3418 }
3419
3430 DataType getTensorDataType(char const* tensorName) const noexcept
3431 {
3432 return mImpl->getTensorDataType(tensorName);
3433 }
3434
3444 int32_t getNbLayers() const noexcept
3445 {
3446 return mImpl->getNbLayers();
3447 }
3448
3458 IHostMemory* serialize() const noexcept
3459 {
3460 return mImpl->serialize();
3461 }
3462
3477 {
3478 return mImpl->createExecutionContext(strategy);
3479 }
3480
3493 TensorLocation getTensorLocation(char const* tensorName) const noexcept
3494 {
3495 return mImpl->getTensorLocation(tensorName);
3496 }
3497
3513 bool isShapeInferenceIO(char const* tensorName) const noexcept
3514 {
3515 return mImpl->isShapeInferenceIO(tensorName);
3516 }
3517
3527 TensorIOMode getTensorIOMode(char const* tensorName) const noexcept
3528 {
3529 return mImpl->getTensorIOMode(tensorName);
3530 }
3531
3540 {
3541 return mImpl->createExecutionContextWithRuntimeConfig(runtimeConfig);
3542 }
3543
3553 {
3554 return mImpl->createRuntimeConfig();
3555 }
3556
3568 int64_t getDeviceMemorySizeV2() const noexcept
3569 {
3570 return mImpl->getDeviceMemorySizeV2();
3571 }
3572
3584 int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept
3585 {
3586 return mImpl->getDeviceMemorySizeForProfileV2(profileIndex);
3587 }
3588
3594 bool isRefittable() const noexcept
3595 {
3596 return mImpl->isRefittable();
3597 }
3598
3615 int32_t getTensorBytesPerComponent(char const* tensorName) const noexcept
3616 {
3617 return mImpl->getTensorBytesPerComponent(tensorName);
3618 }
3619
3633 int32_t getTensorBytesPerComponent(char const* tensorName, int32_t profileIndex) const noexcept
3634 {
3635 return mImpl->getTensorBytesPerComponentV2(tensorName, profileIndex);
3636 }
3637
3654 int32_t getTensorComponentsPerElement(char const* tensorName) const noexcept
3655 {
3656 return mImpl->getTensorComponentsPerElement(tensorName);
3657 }
3658
3672 int32_t getTensorComponentsPerElement(char const* tensorName, int32_t profileIndex) const noexcept
3673 {
3674 return mImpl->getTensorComponentsPerElementV2(tensorName, profileIndex);
3675 }
3676
3687 TensorFormat getTensorFormat(char const* tensorName) const noexcept
3688 {
3689 return mImpl->getTensorFormat(tensorName);
3690 }
3691
3701 TensorFormat getTensorFormat(char const* tensorName, int32_t profileIndex) const noexcept
3702 {
3703 return mImpl->getTensorFormatV2(tensorName, profileIndex);
3704 }
3705
3725 char const* getTensorFormatDesc(char const* tensorName) const noexcept
3726 {
3727 return mImpl->getTensorFormatDesc(tensorName);
3728 }
3729
3748 char const* getTensorFormatDesc(char const* tensorName, int32_t profileIndex) const noexcept
3749 {
3750 return mImpl->getTensorFormatDescV2(tensorName, profileIndex);
3751 }
3752
3765 int32_t getTensorVectorizedDim(char const* tensorName) const noexcept
3766 {
3767 return mImpl->getTensorVectorizedDim(tensorName);
3768 }
3769
3781 int32_t getTensorVectorizedDim(char const* tensorName, int32_t profileIndex) const noexcept
3782 {
3783 return mImpl->getTensorVectorizedDimV2(tensorName, profileIndex);
3784 }
3785
3796 char const* getName() const noexcept
3797 {
3798 return mImpl->getName();
3799 }
3800
3807 int32_t getNbOptimizationProfiles() const noexcept
3808 {
3809 return mImpl->getNbOptimizationProfiles();
3810 }
3811
3827 Dims getProfileShape(char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
3828 {
3829 return mImpl->getProfileShape(tensorName, profileIndex, select);
3830 }
3831
3843 {
3844 return mImpl->getEngineCapability();
3845 }
3846
3861 void setErrorRecorder(IErrorRecorder* recorder) noexcept
3862 {
3863 return mImpl->setErrorRecorder(recorder);
3864 }
3865
3877 {
3878 return mImpl->getErrorRecorder();
3879 }
3880
3891 {
3892 return mImpl->hasImplicitBatchDimension();
3893 }
3894
3907 {
3908 return mImpl->getTacticSources();
3909 }
3910
3919 {
3920 return mImpl->getProfilingVerbosity();
3921 }
3922
3929 {
3930 return mImpl->createEngineInspector();
3931 }
3932
3941 int32_t getNbIOTensors() const noexcept
3942 {
3943 return mImpl->getNbIOTensors();
3944 }
3945
3953 char const* getIOTensorName(int32_t index) const noexcept
3954 {
3955 return mImpl->getIOTensorName(index);
3956 }
3957
3965 {
3966 return mImpl->getHardwareCompatibilityLevel();
3967 }
3968
3979 int32_t getNbAuxStreams() const noexcept
3980 {
3981 return mImpl->getNbAuxStreams();
3982 }
3983
3990 {
3991 return mImpl->createSerializationConfig();
3992 }
3993
4010 {
4011 return mImpl->serializeWithConfig(config);
4012 }
4013
4025 int64_t getStreamableWeightsSize() const noexcept
4026 {
4027 return mImpl->getStreamableWeightsSize();
4028 }
4029
4067 bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept
4068 {
4069 return mImpl->setWeightStreamingBudgetV2(gpuMemoryBudget);
4070 }
4071
4085 int64_t getWeightStreamingBudgetV2() const noexcept
4086 {
4087 return mImpl->getWeightStreamingBudgetV2();
4088 }
4089
4110 int64_t getWeightStreamingAutomaticBudget() const noexcept
4111 {
4112 return mImpl->getWeightStreamingAutomaticBudget();
4113 }
4114
4139 {
4140 return mImpl->getWeightStreamingScratchMemorySize();
4141 }
4142
4152 bool isDebugTensor(char const* name) const noexcept
4153 {
4154 return mImpl->isDebugTensor(name);
4155 }
4156
4177 char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
4178 {
4179 return mImpl->getProfileTensorValuesV2(tensorName, profileIndex, select);
4180 }
4181
4205 int64_t getEngineStat(EngineStat stat) const noexcept
4206 {
4207 return mImpl->getEngineStat(stat);
4208 }
4209
4210protected:
4211 apiv::VCudaEngine* mImpl;
4212};
4213
4214namespace v_1_0
4215{
4217{
4218public:
4222 InterfaceInfo getInterfaceInfo() const noexcept override
4223 {
4224 return {"IOutputAllocator", 1, 0};
4225 }
4226
4250 char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment, cudaStream_t /*stream*/)
4251 {
4252 return nullptr;
4253 }
4254
4263 virtual void notifyShape(char const* tensorName, Dims const& dims) noexcept = 0;
4264};
4265} // namespace v_1_0
4266
4275
4276namespace v_1_0
4277{
4279{
4280public:
4284 InterfaceInfo getInterfaceInfo() const noexcept override
4285 {
4286 return {"IDebugListener", 1, 0};
4287 }
4288
4302 virtual bool processDebugTensor(void const* addr, TensorLocation location, DataType type, Dims const& shape,
4303 char const* name, cudaStream_t stream)
4304 = 0;
4305
4306 ~IDebugListener() override = default;
4307};
4308} // namespace v_1_0
4309
4316
4328{
4329public:
4330 virtual ~IExecutionContext() noexcept = default;
4331
4340 void setDebugSync(bool sync) noexcept
4341 {
4342 mImpl->setDebugSync(sync);
4343 }
4344
4350 bool getDebugSync() const noexcept
4351 {
4352 return mImpl->getDebugSync();
4353 }
4354
4360 void setProfiler(IProfiler* profiler) noexcept
4361 {
4362 mImpl->setProfiler(profiler);
4363 }
4364
4370 IProfiler* getProfiler() const noexcept
4371 {
4372 return mImpl->getProfiler();
4373 }
4374
4380 ICudaEngine const& getEngine() const noexcept
4381 {
4382 return mImpl->getEngine();
4383 }
4384
4394 void setName(char const* name) noexcept
4395 {
4396 mImpl->setName(name);
4397 }
4398
4404 char const* getName() const noexcept
4405 {
4406 return mImpl->getName();
4407 }
4408
4431 void setDeviceMemory(void* memory) noexcept
4432 {
4433 mImpl->setDeviceMemory(memory);
4434 }
4435
4453 void setDeviceMemoryV2(void* memory, int64_t size) noexcept
4454 {
4455 return mImpl->setDeviceMemoryV2(memory, size);
4456 }
4457
4474 Dims getTensorStrides(char const* tensorName) const noexcept
4475 {
4476 return mImpl->getTensorStrides(tensorName);
4477 }
4478
4479public:
4489 int32_t getOptimizationProfile() const noexcept
4490 {
4491 return mImpl->getOptimizationProfile();
4492 }
4493
4507 bool setInputShape(char const* tensorName, Dims const& dims) noexcept
4508 {
4509 return mImpl->setInputShape(tensorName, dims);
4510 }
4511
4544 Dims getTensorShape(char const* tensorName) const noexcept
4545 {
4546 return mImpl->getTensorShape(tensorName);
4547 }
4548
4560 bool allInputDimensionsSpecified() const noexcept
4561 {
4562 return mImpl->allInputDimensionsSpecified();
4563 }
4564
4579 void setErrorRecorder(IErrorRecorder* recorder) noexcept
4580 {
4581 mImpl->setErrorRecorder(recorder);
4582 }
4583
4595 {
4596 return mImpl->getErrorRecorder();
4597 }
4598
4611 bool executeV2(void* const* bindings) noexcept
4612 {
4613 return mImpl->executeV2(bindings);
4614 }
4615
4655 bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
4656 {
4657 return mImpl->setOptimizationProfileAsync(profileIndex, stream);
4658 }
4659
4671 void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
4672 {
4673 mImpl->setEnqueueEmitsProfile(enqueueEmitsProfile);
4674 }
4675
4683 bool getEnqueueEmitsProfile() const noexcept
4684 {
4685 return mImpl->getEnqueueEmitsProfile();
4686 }
4687
4713 bool reportToProfiler() const noexcept
4714 {
4715 return mImpl->reportToProfiler();
4716 }
4717
4757 bool setTensorAddress(char const* tensorName, void* data) noexcept
4758 {
4759 return mImpl->setTensorAddress(tensorName, data);
4760 }
4761
4774 void const* getTensorAddress(char const* tensorName) const noexcept
4775 {
4776 return mImpl->getTensorAddress(tensorName);
4777 }
4778
4797 bool setOutputTensorAddress(char const* tensorName, void* data) noexcept
4798 {
4799 return mImpl->setOutputTensorAddress(tensorName, data);
4800 }
4801
4819 bool setInputTensorAddress(char const* tensorName, void const* data) noexcept
4820 {
4821 return mImpl->setInputTensorAddress(tensorName, data);
4822 }
4823
4838 void* getOutputTensorAddress(char const* tensorName) const noexcept
4839 {
4840 return mImpl->getOutputTensorAddress(tensorName);
4841 }
4842
4871 int32_t inferShapes(int32_t nbMaxNames, char const** tensorNames) noexcept
4872 {
4873 return mImpl->inferShapes(nbMaxNames, tensorNames);
4874 }
4875
4889 {
4890 return mImpl->updateDeviceMemorySizeForShapes();
4891 }
4892
4904 bool setInputConsumedEvent(cudaEvent_t event) noexcept
4905 {
4906 return mImpl->setInputConsumedEvent(event);
4907 }
4908
4914 cudaEvent_t getInputConsumedEvent() const noexcept
4915 {
4916 return mImpl->getInputConsumedEvent();
4917 }
4918
4933 bool setOutputAllocator(char const* tensorName, IOutputAllocator* outputAllocator) noexcept
4934 {
4935 return mImpl->setOutputAllocator(tensorName, outputAllocator);
4936 }
4937
4946 IOutputAllocator* getOutputAllocator(char const* tensorName) const noexcept
4947 {
4948 return mImpl->getOutputAllocator(tensorName);
4949 }
4950
4964 int64_t getMaxOutputSize(char const* tensorName) const noexcept
4965 {
4966 return mImpl->getMaxOutputSize(tensorName);
4967 }
4968
4985 {
4986 return mImpl->setTemporaryStorageAllocator(allocator);
4987 }
4988
4995 {
4996 return mImpl->getTemporaryStorageAllocator();
4997 }
4998
5018 bool enqueueV3(cudaStream_t stream) noexcept
5019 {
5020 return mImpl->enqueueV3(stream);
5021 }
5022
5034 void setPersistentCacheLimit(size_t size) noexcept
5035 {
5036 mImpl->setPersistentCacheLimit(size);
5037 }
5038
5045 size_t getPersistentCacheLimit() const noexcept
5046 {
5047 return mImpl->getPersistentCacheLimit();
5048 }
5049
5069 bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
5070 {
5071 return mImpl->setNvtxVerbosity(verbosity);
5072 }
5073
5082 {
5083 return mImpl->getNvtxVerbosity();
5084 }
5085
5112 void setAuxStreams(cudaStream_t* auxStreams, int32_t nbStreams) noexcept
5113 {
5114 mImpl->setAuxStreams(auxStreams, nbStreams);
5115 }
5116
5124 bool setDebugListener(IDebugListener* listener) noexcept
5125 {
5126 return mImpl->setDebugListener(listener);
5127 }
5128
5135 {
5136 return mImpl->getDebugListener();
5137 }
5138
5153 bool setTensorDebugState(char const* name, bool flag) noexcept
5154 {
5155 return mImpl->setTensorDebugState(name, flag);
5156 }
5157
5165 bool getDebugState(char const* name) const noexcept
5166 {
5167 return mImpl->getDebugState(name);
5168 }
5169
5176 {
5177 return mImpl->getRuntimeConfig();
5178 }
5179
5188 bool setAllTensorsDebugState(bool flag) noexcept
5189 {
5190 return mImpl->setAllTensorsDebugState(flag);
5191 }
5192
5204 bool setUnfusedTensorsDebugState(bool flag) noexcept
5205 {
5206 return mImpl->setUnfusedTensorsDebugState(flag);
5207 }
5208
5214 bool getUnfusedTensorsDebugState() const noexcept
5215 {
5216 return mImpl->getUnfusedTensorsDebugState();
5217 }
5218#if ENABLE_FEATURE_DISABLE_RUNTIME_ALLOCATION
5237 bool isStreamCapturable(cudaStream_t stream) const noexcept {
5238 return mImpl->isStreamCapturable(stream);
5239 }
5240#endif // ENABLE_FEATURE_DISABLE_RUNTIME_ALLOCATION
5241
5242protected:
5243 apiv::VExecutionContext* mImpl;
5244}; // class IExecutionContext
5245
5253enum class LayerInformationFormat : int32_t
5254{
5255 kONELINE = 0,
5256 kJSON = 1,
5257};
5258
5261template <>
5262constexpr inline int32_t EnumMax<LayerInformationFormat>() noexcept
5263{
5264 return 2;
5265}
5266
5283{
5284public:
5285 virtual ~IEngineInspector() noexcept = default;
5286
5299 bool setExecutionContext(IExecutionContext const* context) noexcept
5300 {
5301 return mImpl->setExecutionContext(context);
5302 }
5303
5312 {
5313 return mImpl->getExecutionContext();
5314 }
5315
5336 char const* getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
5337 {
5338 return mImpl->getLayerInformation(layerIndex, format);
5339 }
5340
5359 char const* getEngineInformation(LayerInformationFormat format) const noexcept
5360 {
5361 return mImpl->getEngineInformation(format);
5362 }
5363
5378 void setErrorRecorder(IErrorRecorder* recorder) noexcept
5379 {
5380 mImpl->setErrorRecorder(recorder);
5381 }
5382
5394 {
5395 return mImpl->getErrorRecorder();
5396 }
5397
5398protected:
5399 apiv::VEngineInspector* mImpl;
5400}; // class IEngineInspector
5401
5402} // namespace nvinfer1
5403
5408extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(void* logger, int32_t version) noexcept;
5409
5414extern "C" TENSORRTAPI void* createInferRefitter_INTERNAL(void* engine, void* logger, int32_t version) noexcept;
5415
5420
5426extern "C" TENSORRTAPI nvinfer1::ILogger* getLogger() noexcept;
5427
5428namespace nvinfer1
5429{
5430namespace // unnamed namespace avoids linkage surprises when linking objects built with different versions of this
5431 // header.
5432{
5438inline IRuntime* createInferRuntime(ILogger& logger) noexcept
5439{
5440 return static_cast<IRuntime*>(createInferRuntime_INTERNAL(&logger, NV_TENSORRT_VERSION));
5441}
5442
5449inline IRefitter* createInferRefitter(ICudaEngine& engine, ILogger& logger) noexcept
5450{
5451 return static_cast<IRefitter*>(createInferRefitter_INTERNAL(&engine, &logger, NV_TENSORRT_VERSION));
5452}
5453
5454} // namespace
5455
5467template <typename T>
5469{
5470public:
5472 {
5473 getPluginRegistry()->registerCreator(instance, "");
5474 }
5475
5476private:
5478 T instance{};
5479};
5480
5481} // namespace nvinfer1
5482
5483#define REGISTER_TENSORRT_PLUGIN(name) \
5484 static nvinfer1::PluginRegistrar<name> pluginRegistrar##name {}
5485
5486namespace nvinfer1
5487{
5497{
5498public:
5506 virtual ILogger* findLogger() = 0;
5507
5508protected:
5509 virtual ~ILoggerFinder() = default;
5510};
5511
5514namespace v_1_0
5515{
5516
5518{
5519public:
5521 ~IGpuAsyncAllocator() override = default;
5522
5552 void* allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags,
5553 cudaStream_t /*stream*/) noexcept override = 0;
5554
5580 bool deallocateAsync(void* const memory, cudaStream_t /*stream*/) noexcept override = 0;
5581
5606 uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept override
5607 {
5608 return allocateAsync(size, alignment, flags, nullptr);
5609 }
5610
5629 TRT_DEPRECATED bool deallocate(void* const memory) noexcept override
5630 {
5631 return deallocateAsync(memory, nullptr);
5632 }
5633
5637 InterfaceInfo getInterfaceInfo() const noexcept override
5638 {
5639 return {"IGpuAllocator", 1, 0};
5640 }
5641};
5642
5644{
5645public:
5649 InterfaceInfo getInterfaceInfo() const noexcept override
5650 {
5651 return InterfaceInfo{"PLUGIN CREATOR_V3ONE", 1, 0};
5652 }
5653
5671 AsciiChar const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept = 0;
5672
5679 virtual PluginFieldCollection const* getFieldNames() noexcept = 0;
5680
5687 virtual AsciiChar const* getPluginName() const noexcept = 0;
5688
5695 virtual AsciiChar const* getPluginVersion() const noexcept = 0;
5696
5703 virtual AsciiChar const* getPluginNamespace() const noexcept = 0;
5704
5706 virtual ~IPluginCreatorV3One() = default;
5707
5708protected:
5711 IPluginCreatorV3One& operator=(IPluginCreatorV3One const&) & = default;
5712 IPluginCreatorV3One& operator=(IPluginCreatorV3One&&) & = default;
5713};
5714
5715} // namespace v_1_0
5716
5731
5741
5742} // namespace nvinfer1
5743
5747extern "C" TENSORRTAPI int32_t getInferLibMajorVersion() noexcept;
5751extern "C" TENSORRTAPI int32_t getInferLibMinorVersion() noexcept;
5755extern "C" TENSORRTAPI int32_t getInferLibPatchVersion() noexcept;
5759extern "C" TENSORRTAPI int32_t getInferLibBuildVersion() noexcept;
5760
5761#endif // NV_INFER_RUNTIME_H
TENSORRTAPI nvinfer1::IPluginRegistry * getPluginRegistry() noexcept
Return the plugin registry.
TENSORRTAPI nvinfer1::ILogger * getLogger() noexcept
Return the logger object.
TENSORRTAPI int32_t getInferLibMinorVersion() noexcept
Return the library minor version number.
TENSORRTAPI int32_t getInferLibMajorVersion() noexcept
Return the library major version number.
TENSORRTAPI int32_t getInferLibPatchVersion() noexcept
Return the library patch version number.
TENSORRTAPI int32_t getInferLibBuildVersion() noexcept
Return the library build version number.
#define TENSORRTAPI
Definition: NvInferRuntimeBase.h:69
#define NV_TENSORRT_VERSION
Definition: NvInferRuntimeBase.h:101
#define TRT_DEPRECATED
Definition: NvInferRuntimeBase.h:42
#define TRT_DEPRECATED_ENUM
Definition: NvInferRuntimeBase.h:43
Structure to define the dimensions of a tensor.
Definition: NvInferRuntimeBase.h:218
static constexpr int32_t MAX_DIMS
The maximum rank (number of dimensions) supported for a tensor.
Definition: NvInferRuntimeBase.h:221
Analog of class Dims with expressions instead of constants for the dimensions.
Definition: NvInferRuntime.h:350
IDimensionExpr const * d[Dims::MAX_DIMS]
The extent of each dimension.
Definition: NvInferRuntime.h:353
int32_t nbDims
The number of dimensions.
Definition: NvInferRuntime.h:352
An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:3401
int32_t getTensorBytesPerComponent(char const *tensorName) const noexcept
Return the number of bytes per component of an element, or -1 if the tensor is not vectorized or prov...
Definition: NvInferRuntime.h:3615
ISerializationConfig * createSerializationConfig() noexcept
Create a serialization configuration object.
Definition: NvInferRuntime.h:3989
char const * getIOTensorName(int32_t index) const noexcept
Return name of an IO tensor.
Definition: NvInferRuntime.h:3953
int64_t getWeightStreamingBudgetV2() const noexcept
Returns the current weight streaming device memory budget in bytes.
Definition: NvInferRuntime.h:4085
EngineCapability getEngineCapability() const noexcept
Determine what execution capability this engine has.
Definition: NvInferRuntime.h:3842
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:3876
TensorFormat getTensorFormat(char const *tensorName, int32_t profileIndex) const noexcept
Return the tensor format of given profile, or TensorFormat::kLINEAR if the provided name does not map...
Definition: NvInferRuntime.h:3701
int64_t const * getProfileTensorValuesV2(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values (not dimensions) for an input tensor given its name under ...
Definition: NvInferRuntime.h:4176
TRT_DEPRECATED bool hasImplicitBatchDimension() const noexcept
Query whether the engine was built with an implicit batch dimension.
Definition: NvInferRuntime.h:3890
apiv::VCudaEngine * mImpl
Definition: NvInferRuntime.h:4211
IExecutionContext * createExecutionContext(ExecutionContextAllocationStrategy strategy=ExecutionContextAllocationStrategy::kSTATIC) noexcept
Create an execution context and specify the strategy for allocating internal activation memory.
Definition: NvInferRuntime.h:3475
char const * getTensorFormatDesc(char const *tensorName) const noexcept
Return the human readable description of the tensor format, or empty string if the provided name does...
Definition: NvInferRuntime.h:3725
Dims getProfileShape(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for an input tensor given its name under an optimizati...
Definition: NvInferRuntime.h:3827
bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept
Limit the maximum amount of GPU memory usable for network weights in bytes.
Definition: NvInferRuntime.h:4067
IExecutionContext * createExecutionContext(IRuntimeConfig *runtimeConfig) noexcept
Create an execution context with TensorRT JIT runtime config.
Definition: NvInferRuntime.h:3539
int32_t getNbAuxStreams() const noexcept
Return the number of auxiliary streams used by this engine.
Definition: NvInferRuntime.h:3979
int64_t getStreamableWeightsSize() const noexcept
Get the total size in bytes of all streamable weights.
Definition: NvInferRuntime.h:4025
DataType getTensorDataType(char const *tensorName) const noexcept
Determine the required data type for a buffer from its tensor name.
Definition: NvInferRuntime.h:3430
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:3861
TacticSources getTacticSources() const noexcept
return the tactic sources required by this engine.
Definition: NvInferRuntime.h:3906
IHostMemory * serializeWithConfig(ISerializationConfig &config) const noexcept
Serialize the network to a stream with the provided SerializationConfig.
Definition: NvInferRuntime.h:4009
virtual ~ICudaEngine() noexcept=default
int64_t getWeightStreamingAutomaticBudget() const noexcept
TensorRT automatically determines a device memory budget for the model to run. The budget is close to...
Definition: NvInferRuntime.h:4110
bool isDebugTensor(char const *name) const noexcept
Check if a tensor is marked as a debug tensor.
Definition: NvInferRuntime.h:4152
int32_t getTensorVectorizedDim(char const *tensorName, int32_t profileIndex) const noexcept
Return the dimension index that the buffer is vectorized of given profile, or -1 if the provided name...
Definition: NvInferRuntime.h:3781
char const * getName() const noexcept
Returns the name of the network associated with the engine.
Definition: NvInferRuntime.h:3796
ProfilingVerbosity getProfilingVerbosity() const noexcept
Return the ProfilingVerbosity the builder config was set to when the engine was built.
Definition: NvInferRuntime.h:3918
bool isShapeInferenceIO(char const *tensorName) const noexcept
True if tensor is required as input for shape calculations or is output from shape calculations.
Definition: NvInferRuntime.h:3513
int64_t getWeightStreamingScratchMemorySize() const noexcept
Returns the size of the scratch memory required by the current weight streaming budget.
Definition: NvInferRuntime.h:4138
int64_t getDeviceMemorySizeV2() const noexcept
Return the maximum device memory required by the context over all profiles.
Definition: NvInferRuntime.h:3568
int32_t getTensorVectorizedDim(char const *tensorName) const noexcept
Return the dimension index that the buffer is vectorized, or -1 if the provided name does not map to ...
Definition: NvInferRuntime.h:3765
int32_t getTensorComponentsPerElement(char const *tensorName, int32_t profileIndex) const noexcept
Return the number of components included in one element of given profile, or -1 if tensor is not vect...
Definition: NvInferRuntime.h:3672
int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept
Return the maximum device memory required by the context for a profile.
Definition: NvInferRuntime.h:3584
IRuntimeConfig * createRuntimeConfig() noexcept
Create a runtime config for TensorRT JIT. The caller is responsible for ownership of the returned IRu...
Definition: NvInferRuntime.h:3552
TensorFormat getTensorFormat(char const *tensorName) const noexcept
Return the tensor format, or TensorFormat::kLINEAR if the provided name does not map to an input or o...
Definition: NvInferRuntime.h:3687
IHostMemory * serialize() const noexcept
Serialize the network to a stream.
Definition: NvInferRuntime.h:3458
int64_t getEngineStat(EngineStat stat) const noexcept
Get engine statistics according to the given enum value.
Definition: NvInferRuntime.h:4205
TensorLocation getTensorLocation(char const *tensorName) const noexcept
Get whether an input or output tensor must be on GPU or CPU.
Definition: NvInferRuntime.h:3493
IEngineInspector * createEngineInspector() const noexcept
Create a new engine inspector which prints the layer information in an engine or an execution context...
Definition: NvInferRuntime.h:3928
int32_t getTensorBytesPerComponent(char const *tensorName, int32_t profileIndex) const noexcept
Return the number of bytes per component of an element given of given profile, or -1 if the tensor is...
Definition: NvInferRuntime.h:3633
HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept
Return the hardware compatibility level of this engine.
Definition: NvInferRuntime.h:3964
int32_t getNbOptimizationProfiles() const noexcept
Get the number of optimization profiles defined for this engine.
Definition: NvInferRuntime.h:3807
char const * getTensorFormatDesc(char const *tensorName, int32_t profileIndex) const noexcept
Return the human readable description of the tensor format of given profile, or empty string if the p...
Definition: NvInferRuntime.h:3748
TensorIOMode getTensorIOMode(char const *tensorName) const noexcept
Determine whether a tensor is an input or output tensor.
Definition: NvInferRuntime.h:3527
int32_t getNbLayers() const noexcept
Get the number of layers in the network.
Definition: NvInferRuntime.h:3444
int32_t getNbIOTensors() const noexcept
Return number of IO tensors.
Definition: NvInferRuntime.h:3941
int32_t getTensorComponentsPerElement(char const *tensorName) const noexcept
Return the number of components included in one element, or -1 if tensor is not vectorized or if the ...
Definition: NvInferRuntime.h:3654
bool isRefittable() const noexcept
Return true if an engine can be refit.
Definition: NvInferRuntime.h:3594
An IDimensionExpr represents an integer expression constructed from constants, input dimensions,...
Definition: NvInferRuntime.h:232
bool isConstant() const noexcept
Return true if expression is a build-time constant.
Definition: NvInferRuntime.h:237
bool isSizeTensor() const noexcept
Return true if this denotes the value of a size tensor.
Definition: NvInferRuntime.h:263
virtual ~IDimensionExpr() noexcept=default
apiv::VDimensionExpr * mImpl
Definition: NvInferRuntime.h:254
int64_t getConstantValue() const noexcept
Get the value of the constant.
Definition: NvInferRuntime.h:248
An engine inspector which prints out the layer information of an engine or an execution context.
Definition: NvInferRuntime.h:5283
char const * getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
Get a string describing the information about a specific layer in the current engine or the execution...
Definition: NvInferRuntime.h:5336
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:5393
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:5378
IExecutionContext const * getExecutionContext() const noexcept
Get the context currently being inspected.
Definition: NvInferRuntime.h:5311
apiv::VEngineInspector * mImpl
Definition: NvInferRuntime.h:5399
virtual ~IEngineInspector() noexcept=default
char const * getEngineInformation(LayerInformationFormat format) const noexcept
Get a string describing the information about all the layers in the current engine or the execution c...
Definition: NvInferRuntime.h:5359
Context for executing inference using an engine, with functionally unsafe features.
Definition: NvInferRuntime.h:4328
IOutputAllocator * getOutputAllocator(char const *tensorName) const noexcept
Get output allocator associated with output tensor of given name, or nullptr if the provided name doe...
Definition: NvInferRuntime.h:4946
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:4594
bool reportToProfiler() const noexcept
Calculate layer timing info for the current optimization profile in IExecutionContext and update the ...
Definition: NvInferRuntime.h:4713
virtual ~IExecutionContext() noexcept=default
void setDeviceMemory(void *memory) noexcept
Set the device memory for use by this execution context.
Definition: NvInferRuntime.h:4431
bool setTensorDebugState(char const *name, bool flag) noexcept
Set debug state of tensor given the tensor name.
Definition: NvInferRuntime.h:5153
char const * getName() const noexcept
Return the name of the execution context.
Definition: NvInferRuntime.h:4404
IGpuAllocator * getTemporaryStorageAllocator() const noexcept
Get allocator set by setTemporaryStorageAllocator.
Definition: NvInferRuntime.h:4994
void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
Set whether enqueue emits layer timing to the profiler.
Definition: NvInferRuntime.h:4671
bool setUnfusedTensorsDebugState(bool flag) noexcept
Turn the debug state of unfused tensors on or off.
Definition: NvInferRuntime.h:5204
Dims getTensorShape(char const *tensorName) const noexcept
Return the shape of the given input or output.
Definition: NvInferRuntime.h:4544
bool getDebugState(char const *name) const noexcept
Get the debug state.
Definition: NvInferRuntime.h:5165
bool setInputShape(char const *tensorName, Dims const &dims) noexcept
Set shape of given input.
Definition: NvInferRuntime.h:4507
bool executeV2(void *const *bindings) noexcept
Synchronously execute a network.
Definition: NvInferRuntime.h:4611
bool getEnqueueEmitsProfile() const noexcept
Get the enqueueEmitsProfile state.
Definition: NvInferRuntime.h:4683
void const * getTensorAddress(char const *tensorName) const noexcept
Get memory address bound to given input or output tensor, or nullptr if the provided name does not ma...
Definition: NvInferRuntime.h:4774
bool setOutputAllocator(char const *tensorName, IOutputAllocator *outputAllocator) noexcept
Set output allocator to use for output tensor of given name. Pass nullptr to outputAllocator to unset...
Definition: NvInferRuntime.h:4933
bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
Select an optimization profile for the current context with async semantics.
Definition: NvInferRuntime.h:4655
apiv::VExecutionContext * mImpl
Definition: NvInferRuntime.h:5243
bool setOutputTensorAddress(char const *tensorName, void *data) noexcept
Set the memory address for a given output tensor.
Definition: NvInferRuntime.h:4797
void setPersistentCacheLimit(size_t size) noexcept
Set the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:5034
size_t getPersistentCacheLimit() const noexcept
Get the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:5045
bool setAllTensorsDebugState(bool flag) noexcept
Turn the debug state of all debug tensors on or off.
Definition: NvInferRuntime.h:5188
ICudaEngine const & getEngine() const noexcept
Get the associated engine.
Definition: NvInferRuntime.h:4380
ProfilingVerbosity getNvtxVerbosity() const noexcept
Get the NVTX verbosity of the execution context.
Definition: NvInferRuntime.h:5081
size_t updateDeviceMemorySizeForShapes() noexcept
Recompute the internal activation buffer sizes based on the current input shapes, and return the tota...
Definition: NvInferRuntime.h:4888
void setAuxStreams(cudaStream_t *auxStreams, int32_t nbStreams) noexcept
Set the auxiliary streams that TensorRT should launch kernels on in the next enqueueV3() call.
Definition: NvInferRuntime.h:5112
int64_t getMaxOutputSize(char const *tensorName) const noexcept
Get upper bound on an output tensor's size, in bytes, based on the current optimization profile and i...
Definition: NvInferRuntime.h:4964
int32_t inferShapes(int32_t nbMaxNames, char const **tensorNames) noexcept
Run shape calculations.
Definition: NvInferRuntime.h:4871
bool setDebugListener(IDebugListener *listener) noexcept
Set DebugListener for this execution context.
Definition: NvInferRuntime.h:5124
bool setTensorAddress(char const *tensorName, void *data) noexcept
Set memory address for given input or output tensor.
Definition: NvInferRuntime.h:4757
bool setTemporaryStorageAllocator(IGpuAllocator *allocator) noexcept
Specify allocator to use for internal temporary storage.
Definition: NvInferRuntime.h:4984
void * getOutputTensorAddress(char const *tensorName) const noexcept
Get memory address for given output.
Definition: NvInferRuntime.h:4838
bool enqueueV3(cudaStream_t stream) noexcept
Enqueue inference on a stream.
Definition: NvInferRuntime.h:5018
IDebugListener * getDebugListener() noexcept
Get the DebugListener of this execution context.
Definition: NvInferRuntime.h:5134
int32_t getOptimizationProfile() const noexcept
Get the index of the currently selected optimization profile.
Definition: NvInferRuntime.h:4489
bool setInputTensorAddress(char const *tensorName, void const *data) noexcept
Set memory address for given input.
Definition: NvInferRuntime.h:4819
bool getDebugSync() const noexcept
Get the debug sync flag.
Definition: NvInferRuntime.h:4350
bool setInputConsumedEvent(cudaEvent_t event) noexcept
Mark input as consumed.
Definition: NvInferRuntime.h:4904
Dims getTensorStrides(char const *tensorName) const noexcept
Return the strides of the buffer for the given tensor name.
Definition: NvInferRuntime.h:4474
bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
Set the verbosity of the NVTX markers in the execution context.
Definition: NvInferRuntime.h:5069
IProfiler * getProfiler() const noexcept
Get the profiler.
Definition: NvInferRuntime.h:4370
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:4579
void setDeviceMemoryV2(void *memory, int64_t size) noexcept
Set the device memory and its corresponding size for use by this execution context.
Definition: NvInferRuntime.h:4453
bool allInputDimensionsSpecified() const noexcept
Whether all dynamic dimensions of input tensors have been specified.
Definition: NvInferRuntime.h:4560
bool getUnfusedTensorsDebugState() const noexcept
Get the debug state of unfused tensors.
Definition: NvInferRuntime.h:5214
void setProfiler(IProfiler *profiler) noexcept
Set the profiler.
Definition: NvInferRuntime.h:4360
void setName(char const *name) noexcept
Set the name of the execution context.
Definition: NvInferRuntime.h:4394
cudaEvent_t getInputConsumedEvent() const noexcept
The event associated with consuming the input.
Definition: NvInferRuntime.h:4914
IRuntimeConfig * getRuntimeConfig() const noexcept
Get the runtime config object used during execution context creation.
Definition: NvInferRuntime.h:5175
Object for constructing IDimensionExpr.
Definition: NvInferRuntime.h:287
IDimensionExpr const * operation(DimensionOperation op, IDimensionExpr const &first, IDimensionExpr const &second) noexcept
Get the operation.
Definition: NvInferRuntime.h:303
virtual ~IExprBuilder() noexcept=default
IDimensionExpr const * constant(int64_t value) noexcept
Return pointer to IDimensionExpr for given value.
Definition: NvInferRuntime.h:292
apiv::VExprBuilder * mImpl
Definition: NvInferRuntime.h:310
IDimensionExpr const * declareSizeTensor(int32_t outputIndex, IDimensionExpr const &opt, IDimensionExpr const &upper)
Declare a size tensor at the given output index, with the specified auto-tuning formula and upper bou...
Definition: NvInferRuntime.h:338
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:142
void * data() const noexcept
A pointer to the raw data that is owned by the library.
Definition: NvInferRuntime.h:147
DataType type() const noexcept
The type of the memory that was allocated.
Definition: NvInferRuntime.h:159
std::size_t size() const noexcept
The size in bytes of the data that was allocated.
Definition: NvInferRuntime.h:153
apiv::VHostMemory * mImpl
Definition: NvInferRuntime.h:165
virtual ~IHostMemory() noexcept=default
A virtual base class to find a logger. Allows a plugin to find an instance of a logger if it needs to...
Definition: NvInferRuntime.h:5497
virtual ILogger * findLogger()=0
Get the logger used by the engine or execution context which called the plugin method.
virtual ~ILoggerFinder()=default
Application-implemented logging interface for the builder, refitter and runtime.
Definition: NvInferRuntime.h:1588
virtual ~ILogger()=default
Severity
The severity corresponding to a log message.
Definition: NvInferRuntime.h:1596
virtual void log(Severity severity, AsciiChar const *msg) noexcept=0
A callback implemented by the application to handle logging messages;.
Forward declaration of IEngineInspector for use by other interfaces.
Definition: NvInferRuntime.h:51
INoCopy & operator=(INoCopy &&other)=delete
INoCopy(INoCopy const &other)=delete
INoCopy(INoCopy &&other)=delete
virtual ~INoCopy()=default
INoCopy & operator=(INoCopy const &other)=delete
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:2672
TRT_DEPRECATED int32_t const * getShapeValues(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2793
apiv::VOptimizationProfile * mImpl
Definition: NvInferRuntime.h:2909
Dims getDimensions(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:2713
TRT_DEPRECATED bool setShapeValues(char const *inputName, OptProfileSelector select, int32_t const *values, int32_t nbValues) noexcept
Set the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2765
virtual ~IOptimizationProfile() noexcept=default
float getExtraMemoryTarget() const noexcept
Get the extra memory target that has been defined for this profile.
Definition: NvInferRuntime.h:2823
bool setExtraMemoryTarget(float target) noexcept
Set a target for extra GPU memory that may be used by this profile.
Definition: NvInferRuntime.h:2811
bool setDimensions(char const *inputName, OptProfileSelector select, Dims const &dims) noexcept
Set the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:2701
bool isValid() const noexcept
Check whether the optimization profile can be passed to an IBuilderConfig object.
Definition: NvInferRuntime.h:2840
int64_t const * getShapeValuesV2(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2903
bool setShapeValuesV2(char const *inputName, OptProfileSelector select, int64_t const *values, int32_t nbValues) noexcept
Set the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2890
int32_t getNbShapeValues(char const *inputName) const noexcept
Get the number of values for an input shape tensor.
Definition: NvInferRuntime.h:2779
Single registration point for all plugins in an application. It is used to find plugin implementation...
Definition: NvInferRuntimeCommon.h:56
virtual TRT_DEPRECATED bool registerCreator(IPluginCreator &creator, AsciiChar const *const pluginNamespace) noexcept=0
Register a plugin creator implementing IPluginCreator. Returns false if any plugin creator with the s...
Interface for plugins to access per context resources provided by TensorRT.
Definition: NvInferRuntime.h:813
virtual IErrorRecorder * getErrorRecorder() const noexcept=0
Get the error recorder associated with the resource context.
IPluginResourceContext & operator=(IPluginResourceContext const &) &=default
virtual IGpuAllocator * getGpuAllocator() const noexcept=0
Get the GPU allocator associated with the resource context.
Similar to IPluginV2Ext, but with support for dynamic shapes.
Definition: NvInferRuntime.h:407
IPluginV2DynamicExt * clone() const noexcept override=0
Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin...
virtual ~IPluginV2DynamicExt() noexcept
Definition: NvInferRuntime.h:558
Plugin class for user-implemented layers.
Definition: NvInferRuntimePlugin.h:474
Updates weights in an engine.
Definition: NvInferRuntime.h:2264
bool refitCudaEngineAsync(cudaStream_t stream) noexcept
Enqueue weights refitting of the associated engine on the given stream.
Definition: NvInferRuntime.h:2594
int32_t getMaxThreads() const noexcept
get the maximum number of threads that can be used by the refitter.
Definition: NvInferRuntime.h:2473
TensorLocation getWeightsLocation(char const *weightsName) const noexcept
Get location for the weights associated with the given name.
Definition: NvInferRuntime.h:2532
bool setNamedWeights(char const *name, Weights weights) noexcept
Specify new weights of given name.
Definition: NvInferRuntime.h:2397
int32_t getAllWeights(int32_t size, char const **weightsNames) noexcept
Get names of all weights that could be refit.
Definition: NvInferRuntime.h:2433
ILogger * getLogger() const noexcept
get the logger with which the refitter was created
Definition: NvInferRuntime.h:2443
bool refitCudaEngine() noexcept
Refits associated engine.
Definition: NvInferRuntime.h:2300
int32_t getMissingWeights(int32_t size, char const **weightsNames) noexcept
Get names of missing weights.
Definition: NvInferRuntime.h:2417
int32_t getMissing(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of missing weights.
Definition: NvInferRuntime.h:2321
Weights getNamedWeights(char const *weightsName) const noexcept
Get weights associated with the given name.
Definition: NvInferRuntime.h:2516
bool unsetNamedWeights(char const *weightsName) noexcept
Unset weights associated with the given name.
Definition: NvInferRuntime.h:2548
Weights getWeightsPrototype(char const *weightsName) const noexcept
Get the Weights prototype associated with the given name.
Definition: NvInferRuntime.h:2612
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:2459
bool setNamedWeights(char const *name, Weights weights, TensorLocation location) noexcept
Specify new weights on a specified device of given name.
Definition: NvInferRuntime.h:2500
void setWeightsValidation(bool weightsValidation) noexcept
Set whether to validate weights during refitting.
Definition: NvInferRuntime.h:2564
apiv::VRefitter * mImpl
Definition: NvInferRuntime.h:2618
int32_t getAll(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of all weights that could be refit.
Definition: NvInferRuntime.h:2338
virtual ~IRefitter() noexcept=default
bool getWeightsValidation() const noexcept
Get whether to validate weights values during refitting.
Definition: NvInferRuntime.h:2572
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:2357
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:2372
A class for Runtime cache currently used for TensorRT JIT compilation. This cache can be serialized a...
Definition: NvInferRuntime.h:3129
virtual ~IRuntimeCache() noexcept=default
bool deserialize(void const *blob, size_t size) noexcept
Deserialize the Runtime cache from a stream that contains serialized Runtime cache.
Definition: NvInferRuntime.h:3149
bool reset() noexcept
Reset the Runtime cache. Clears all content within the cache.
Definition: NvInferRuntime.h:3157
apiv::VRuntimeCache * mImpl
Definition: NvInferRuntime.h:3163
A class for runtime configuration. This class is used during execution context creation.
Definition: NvInferRuntime.h:3252
bool setCudaGraphStrategy(CudaGraphStrategy strategy) noexcept
Set the strategy used for CUDA graphs for JIT (Just-In-Time) inference. Default is kDISABLED.
Definition: NvInferRuntime.h:3346
DynamicShapesKernelSpecializationStrategy getDynamicShapesKernelSpecializationStrategy() const noexcept
Return the dynamic shape specialization strategy of this config.
Definition: NvInferRuntime.h:3322
virtual ~IRuntimeConfig() noexcept=default
apiv::VRuntimeConfig * mImpl
Definition: NvInferRuntime.h:3362
IRuntimeCache * createRuntimeCache() const noexcept
Create an empty Runtime cache.
Definition: NvInferRuntime.h:3281
ExecutionContextAllocationStrategy getExecutionContextAllocationStrategy() const noexcept
Get the execution context allocation strategy.
Definition: NvInferRuntime.h:3271
bool setRuntimeCache(IRuntimeCache const &cache) noexcept
Set Runtime cache to the runtime config. Enables Runtime caching.
Definition: NvInferRuntime.h:3291
void setDynamicShapesKernelSpecializationStrategy(DynamicShapesKernelSpecializationStrategy dynamicShapesKernelSpecializationStrategy) noexcept
Set the dynamic shape kernel specialization strategy for this config.
Definition: NvInferRuntime.h:3311
IRuntimeCache * getRuntimeCache() const noexcept
Get the Runtime cache from the runtime config.
Definition: NvInferRuntime.h:3301
CudaGraphStrategy getCudaGraphStrategy() const noexcept
Return the strategy used for CUDA graphs for JIT (Just-In-Time) inference.
Definition: NvInferRuntime.h:3356
Allows a serialized functionally unsafe engine to be deserialized.
Definition: NvInferRuntime.h:1921
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:2070
IRuntime * loadRuntime(char const *path) noexcept
Load IRuntime from the file.
Definition: NvInferRuntime.h:2186
bool getEngineHostCodeAllowed() const noexcept
Get whether the runtime is allowed to deserialize engines with host executable code.
Definition: NvInferRuntime.h:2208
int64_t getEngineHeaderSize() const noexcept
Get size of engine header in bytes.
Definition: NvInferRuntime.h:2220
TempfileControlFlags getTempfileControlFlags() const noexcept
Get the tempfile control flags for this runtime.
Definition: NvInferRuntime.h:2158
void setEngineHostCodeAllowed(bool allowed) noexcept
Set whether the runtime is allowed to deserialize engines with host executable code.
Definition: NvInferRuntime.h:2198
virtual ~IRuntime() noexcept=default
void setTemporaryDirectory(char const *path) noexcept
Set the directory that will be used by this runtime for temporary files.
Definition: NvInferRuntime.h:2119
IPluginRegistry & getPluginRegistry() noexcept
Get the local plugin registry that can be used by the runtime.
Definition: NvInferRuntime.h:2168
apiv::VRuntime * mImpl
Definition: NvInferRuntime.h:2253
int32_t getNbDLACores() const noexcept
Returns number of DLA hardware cores accessible or 0 if DLA is unavailable.
Definition: NvInferRuntime.h:1954
EngineValidity getEngineValidity(void const *blob, int64_t blobSize, uint64_t *diagnostics) const noexcept
Check for engine validity by inspecting the serialized engine header.
Definition: NvInferRuntime.h:2247
ICudaEngine * deserializeCudaEngine(void const *blob, std::size_t size) noexcept
Deserialize an engine from host memory.
Definition: NvInferRuntime.h:2022
void setTempfileControlFlags(TempfileControlFlags flags) noexcept
Set the tempfile control flags for this runtime.
Definition: NvInferRuntime.h:2146
int32_t getDLACore() const noexcept
Get the DLA core that the engine executes on.
Definition: NvInferRuntime.h:1946
void setGpuAllocator(IGpuAllocator *allocator) noexcept
Set the GPU allocator.
Definition: NvInferRuntime.h:1970
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:2004
ICudaEngine * deserializeCudaEngine(IStreamReaderV2 &streamReader)
Deserialize an engine from a stream. IStreamReaderV2 is expected to support reading to both host and ...
Definition: NvInferRuntime.h:2045
ILogger * getLogger() const noexcept
get the logger with which the runtime was created
Definition: NvInferRuntime.h:2055
int32_t getMaxThreads() const noexcept
Get the maximum number of threads that can be used by the runtime.
Definition: NvInferRuntime.h:2084
char const * getTemporaryDirectory() const noexcept
Get the directory that will be used by this runtime for temporary files.
Definition: NvInferRuntime.h:2130
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:1989
Holds properties for configuring an engine to serialize the binary.
Definition: NvInferRuntime.h:3020
virtual ~ISerializationConfig() noexcept=default
bool clearFlag(SerializationFlag serializationFlag) noexcept
clear a serialization flag.
Definition: NvInferRuntime.h:3059
bool setFlag(SerializationFlag serializationFlag) noexcept
Set a serialization flag.
Definition: NvInferRuntime.h:3071
SerializationFlags getFlags() const noexcept
Get the serialization flags for this config.
Definition: NvInferRuntime.h:3047
bool getFlag(SerializationFlag serializationFlag) const noexcept
Returns true if the serialization flag is set.
Definition: NvInferRuntime.h:3083
apiv::VSerializationConfig * mImpl
Definition: NvInferRuntime.h:3089
An Interface class for version control.
Definition: NvInferRuntimeBase.h:278
Version information associated with a TRT interface.
Definition: NvInferRuntimeBase.h:243
Register the plugin creator to the registry The static registry object will be instantiated when the ...
Definition: NvInferRuntime.h:5469
PluginRegistrar()
Definition: NvInferRuntime.h:5471
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:124
DataType type
The type of the weights.
Definition: NvInferRuntime.h:126
int64_t count
The number of weights in the array.
Definition: NvInferRuntime.h:128
void const * values
The weight values, in a contiguous array.
Definition: NvInferRuntime.h:127
Definition: NvInferRuntime.h:4279
virtual bool processDebugTensor(void const *addr, TensorLocation location, DataType type, Dims const &shape, char const *name, cudaStream_t stream)=0
Callback function that is called when a debug tensor’s value is updated and the debug state of the te...
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:4284
~IDebugListener() override=default
Definition: NvInferRuntimeBase.h:415
Definition: NvInferRuntime.h:1656
virtual void * allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t) noexcept
A thread-safe callback implemented by the application to handle stream-ordered acquisition of GPU mem...
Definition: NvInferRuntime.h:1778
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1819
virtual TRT_DEPRECATED bool deallocate(void *const memory) noexcept=0
A thread-safe callback implemented by the application to handle release of GPU memory.
~IGpuAllocator() override=default
virtual void * reallocate(void *const, uint64_t, uint64_t) noexcept
A thread-safe callback implemented by the application to resize an existing allocation.
Definition: NvInferRuntime.h:1725
virtual TRT_DEPRECATED void * allocate(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept=0
A thread-safe callback implemented by the application to handle acquisition of GPU memory.
virtual bool deallocateAsync(void *const memory, cudaStream_t) noexcept
A thread-safe callback implemented by the application to handle stream-ordered release of GPU memory.
Definition: NvInferRuntime.h:1811
Definition: NvInferRuntime.h:5518
bool deallocateAsync(void *const memory, cudaStream_t) noexcept override=0
A thread-safe callback implemented by the application to handle stream-ordered asynchronous release o...
void * allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t) noexcept override=0
A thread-safe callback implemented by the application to handle stream-ordered asynchronous acquisiti...
TRT_DEPRECATED void * allocate(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept override
A thread-safe callback implemented by the application to handle acquisition of GPU memory.
Definition: NvInferRuntime.h:5605
TRT_DEPRECATED bool deallocate(void *const memory) noexcept override
A thread-safe callback implemented by the application to handle release of GPU memory.
Definition: NvInferRuntime.h:5629
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:5637
~IGpuAsyncAllocator() override=default
Definition: NvInferRuntime.h:4217
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:4222
virtual void * reallocateOutputAsync(char const *tensorName, void *currentMemory, uint64_t size, uint64_t alignment, cudaStream_t)
Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated....
Definition: NvInferRuntime.h:4249
virtual void notifyShape(char const *tensorName, Dims const &dims) noexcept=0
Called by TensorRT when the shape of the output tensor is known.
Definition: NvInferPluginBase.h:141
Definition: NvInferPluginBase.h:193
Definition: NvInferRuntime.h:5644
virtual PluginFieldCollection const * getFieldNames() noexcept=0
Return a list of fields that need to be passed to createPlugin() when creating a plugin for use in th...
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:5649
virtual IPluginV3 * createPlugin(AsciiChar const *name, PluginFieldCollection const *fc, TensorRTPhase phase) noexcept=0
Return a plugin object. Return nullptr in case of error.
Definition: NvInferPluginBase.h:206
Definition: NvInferRuntime.h:882
virtual int32_t getFormatCombinationLimit() noexcept
Return the maximum number of format combinations that will be timed by TensorRT during the build phas...
Definition: NvInferRuntime.h:1086
virtual int32_t getNbOutputs() const noexcept=0
Get the number of outputs from the plugin.
virtual int32_t configurePlugin(DynamicPluginTensorDesc const *in, int32_t nbInputs, DynamicPluginTensorDesc const *out, int32_t nbOutputs) noexcept=0
Configure the plugin.
virtual int32_t getNbTactics() noexcept
Query for the number of custom tactics the plugin intends to use.
Definition: NvInferRuntime.h:1062
virtual char const * getMetadataString() noexcept
Query for a string representing the configuration of the plugin. May be called anytime after plugin c...
Definition: NvInferRuntime.h:1097
virtual char const * getTimingCacheID() noexcept
Called to query the suffix to use for the timing cache ID. May be called anytime after plugin creatio...
Definition: NvInferRuntime.h:1078
virtual bool supportsFormatCombination(int32_t pos, DynamicPluginTensorDesc const *inOut, int32_t nbInputs, int32_t nbOutputs) noexcept=0
Return true if plugin supports the format and datatype for the input/output indexed by pos.
virtual int32_t getOutputDataTypes(DataType *outputTypes, int32_t nbOutputs, const DataType *inputTypes, int32_t nbInputs) const noexcept=0
Provide the data types of the plugin outputs if the input tensors have the data types provided.
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:894
virtual int32_t getOutputShapes(DimsExprs const *inputs, int32_t nbInputs, DimsExprs const *shapeInputs, int32_t nbShapeInputs, DimsExprs *outputs, int32_t nbOutputs, IExprBuilder &exprBuilder) noexcept=0
Provide expressions for computing dimensions of the output tensors from dimensions of the input tenso...
virtual int32_t getValidTactics(int32_t *tactics, int32_t nbTactics) noexcept
Query for any custom tactics that the plugin intends to use.
Definition: NvInferRuntime.h:1054
Definition: NvInferRuntime.h:839
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:844
virtual AsciiChar const * getPluginName() const noexcept=0
Return the plugin name. Should match the plugin name returned by the corresponding plugin creator.
Definition: NvInferRuntime.h:1104
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1109
virtual int32_t onShapeChange(PluginTensorDesc const *in, int32_t nbInputs, PluginTensorDesc const *out, int32_t nbOutputs) noexcept=0
Called when a plugin is being prepared for execution for specific dimensions. This could happen multi...
virtual PluginFieldCollection const * getFieldsToSerialize() noexcept=0
Get the plugin fields which should be serialized.
virtual int32_t setTactic(int32_t tactic) noexcept
Set the tactic to be used in the subsequent call to enqueue(). If no custom tactics were advertised,...
Definition: NvInferRuntime.h:1121
virtual int32_t enqueue(PluginTensorDesc const *inputDesc, PluginTensorDesc const *outputDesc, void const *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept=0
Execute the layer.
virtual IPluginV3 * attachToContext(IPluginResourceContext *context) noexcept=0
Clone the plugin, attach the cloned plugin object to a execution context and grant the cloned plugin ...
Definition: NvInferRuntime.h:1292
virtual ~IProfiler() noexcept
Definition: NvInferRuntime.h:1303
virtual void reportLayerTime(char const *layerName, float ms) noexcept=0
Layer time reporting callback.
Definition: NvInferRuntime.h:631
~IStreamReader() override=default
IStreamReader & operator=(IStreamReader const &) &=default
IStreamReader & operator=(IStreamReader &&) &=default
virtual int64_t read(void *destination, int64_t nbBytes)=0
Read the next number of bytes in the stream.
IStreamReader(IStreamReader &&)=default
IStreamReader(IStreamReader const &)=default
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:643
Definition: NvInferRuntime.h:741
IStreamReaderV2 & operator=(IStreamReaderV2 const &) &=default
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:753
IStreamReaderV2(IStreamReaderV2 &&)=default
~IStreamReaderV2() override=default
virtual int64_t read(void *destination, int64_t nbBytes, cudaStream_t stream) noexcept=0
Read the next number of bytes in the stream asynchronously.
IStreamReaderV2(IStreamReaderV2 const &)=default
virtual bool seek(int64_t offset, SeekPosition where) noexcept=0
Sets the position of the stream to the given offset.
IStreamReaderV2 & operator=(IStreamReaderV2 &&) &=default
Definition: NvInferRuntime.h:666
IStreamWriter & operator=(IStreamWriter const &) &=default
IStreamWriter(IStreamWriter &&)=default
virtual int64_t write(void const *data, int64_t nbBytes)=0
write nbBytes of data into the stream.
IStreamWriter(IStreamWriter const &)=default
InterfaceInfo getInterfaceInfo() const noexcept final
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:678
IStreamWriter & operator=(IStreamWriter &&) &=default
~IStreamWriter() override=default
Definition: NvInferRuntime.h:1201
virtual int32_t getAliasedInput(int32_t outputIndex) noexcept
Communicates to TensorRT that the output at the specified output index is aliased to the input at the...
Definition: NvInferRuntime.h:1237
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1203
IRefitter * createInferRefitter(ICudaEngine &engine, ILogger &logger) noexcept
Create an instance of an IRefitter class.
Definition: NvInferRuntime.h:5449
IRuntime * createInferRuntime(ILogger &logger) noexcept
Create an instance of an IRuntime class.
Definition: NvInferRuntime.h:5438
The TensorRT-RTX API version 1 namespace.
uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferRuntime.h:2958
v_1_0::IOutputAllocator IOutputAllocator
Definition: NvInferRuntime.h:4274
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:76
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:179
@ kSUB
Substract the second element from the first.
@ kSUM
Sum of the two operands.
@ kPROD
Product of the two operands.
@ kFLOOR_DIV
Floor division of the first element by the second.
@ kEQUAL
1 if operands are equal, 0 otherwise.
@ kMIN
Minimum of the two operands.
@ kLESS
1 if first operand is less than second operand, 0 otherwise.
@ kMAX
Maximum of the two operands.
@ kCEIL_DIV
Division rounding up.
v_1_0::IPluginV3OneCore IPluginV3OneCore
Definition: NvInferRuntime.h:1254
EngineInvalidityDiagnostics
Bitmask indicating the reason(s) why an engine is invalid.
Definition: NvInferRuntime.h:1889
@ kUNSUPPORTED_CC
Unsupported compute capability on current system.
@ kMALFORMED_ENGINE
Serialized engine does not conform to the expected format.
@ kINSUFFICIENT_GPU_MEMORY
Insufficient GPU memory to hold all engine weights.
@ kCUDA_ERROR
Incorrect installation of the CUDA driver or runtime.
@ kOLD_CUDA_DRIVER
CUDA driver too old (driver downgrade compared to when engine was built).
@ kOLD_CUDA_RUNTIME
CUDA runtime too old (runtime downgrade compared to when engine was built).
@ kVERSION_MISMATCH
TensorRT-RTX version mismatch to when engine was built.
TensorIOMode
Definition of tensor IO Mode.
Definition: NvInferRuntimeBase.h:658
HardwareCompatibilityLevel
Describes requirements of compatibility with GPU architectures other than that of the GPU on which th...
Definition: NvInfer.h:9096
SerializationFlag
List of valid flags that the engine can enable when serializing the bytes.
Definition: NvInferRuntime.h:2999
@ kEXCLUDE_WEIGHTS
Exclude the weights that can be refitted.
@ kINCLUDE_REFIT
Remain refittable if originally so.
constexpr int32_t EnumMax< DynamicShapesKernelSpecializationStrategy >() noexcept
Maximum number of dynamic shape specialization strategies in DynamicShapesKernelSpecializationStrateg...
Definition: NvInferRuntime.h:3204
v_1_0::IStreamWriter IStreamWriter
Definition: NvInferRuntime.h:720
v_1_0::IProfiler IProfiler
Definition: NvInferRuntime.h:1318
DynamicShapesKernelSpecializationStrategy
Different kernel specialization strategies for dynamic shapes.
Definition: NvInferRuntime.h:3178
SeekPosition
Controls the seek mode of IStreamReaderV2.
Definition: NvInferRuntime.h:727
@ kSET
From the beginning of the file.
@ kCUR
From the current position of the file.
@ kEND
From the tail of the file.
v_1_0::IStreamReaderV2 IStreamReaderV2
Definition: NvInferRuntime.h:797
uint32_t TempfileControlFlags
Represents a collection of one or more TempfileControlFlag values combined using bitwise-OR operation...
Definition: NvInferRuntime.h:1396
EngineStat
The kind of engine statistics that queried from the ICudaEngine.
Definition: NvInferRuntime.h:3374
@ kTOTAL_WEIGHTS_SIZE
Return the total weight size in bytes.
@ kSTRIPPED_WEIGHTS_SIZE
Return the stripped weight size in bytes for engines built with BuilderFlag::kSTRIP_PLAN.
CudaGraphStrategy
Strategies available for CUDA graphs optimizations for JIT (Just-In-Time) inference.
Definition: NvInferRuntime.h:3217
v_1_0::IGpuAllocator IGpuAllocator
Definition: NvInferRuntime.h:1855
EngineValidity
Whether a TensorRT-RTX engine is likely to be valid on the current system.
Definition: NvInferRuntime.h:1863
@ kINVALID
Engine is invalid on the current system.
@ kSUBOPTIMAL
Engine is likely to be valid on the current system, but may show reduced performance.
@ kVALID
Engine is likely to be valid on the current system, based on the information in the header.
constexpr int32_t EnumMax< WeightsRole >() noexcept
Maximum number of elements in WeightsRole enum.
Definition: NvInferRuntime.h:1339
char_t AsciiChar
Definition: NvInferRuntimeBase.h:115
TensorRTPhase
Indicates a phase of operation of TensorRT.
Definition: NvInferPluginBase.h:116
@ kV2_DYNAMICEXT
IPluginV2DynamicExt.
constexpr int32_t EnumMax< EngineStat >() noexcept
Maximum number of engine statistic kinds in EngineStat enum.
Definition: NvInferRuntime.h:3388
constexpr int32_t EnumMax< LayerInformationFormat >() noexcept
Definition: NvInferRuntime.h:5262
DataType
The type of weights and tensors. The datatypes other than kBOOL, kINT32, and kINT64 are "activation d...
Definition: NvInferRuntimeBase.h:145
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:1350
@ kSCALE
Scale layer.
@ kCONSTANT
Constant layer.
v_1_0::IDebugListener IDebugListener
Definition: NvInferRuntime.h:4315
TempfileControlFlag
Flags used to control TensorRT's behavior when creating executable temporary files.
Definition: NvInferRuntime.h:1373
@ kALLOW_IN_MEMORY_FILES
Allow creating and loading files in-memory (or unnamed files).
constexpr int32_t EnumMax< OptProfileSelector >() noexcept
Number of different values of OptProfileSelector enum.
Definition: NvInferRuntime.h:2644
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:1328
@ kSHIFT
shift part of IScaleLayer
@ kANY
Any other weights role.
@ kBIAS
bias for IConvolutionLayer or IDeconvolutionLayer
@ kKERNEL
kernel for IConvolutionLayer or IDeconvolutionLayer
constexpr int32_t EnumMax< ProfilingVerbosity >() noexcept
Maximum number of profile verbosity levels in ProfilingVerbosity enum.
Definition: NvInferRuntime.h:2978
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
Definition: NvInferRuntime.h:2970
@ kLAYER_NAMES_ONLY
Print only the layer names. This is the default setting.
@ kDETAILED
Print detailed layer information including layer names and layer parameters.
TacticSource
List of tactic sources for TensorRT.
Definition: NvInferRuntime.h:2921
TensorFormat PluginFormat
PluginFormat is reserved for backward compatibility.
Definition: NvInferRuntimePlugin.h:54
v_1_0::IPluginV3OneRuntime IPluginV3OneRuntime
Definition: NvInferRuntime.h:1278
@ kMIN
Minimum of the two elements.
constexpr int32_t EnumMax< TempfileControlFlag >() noexcept
Maximum number of elements in TempfileControlFlag enum.
Definition: NvInferRuntime.h:1385
uint32_t SerializationFlags
Represents one or more SerializationFlag values using binary OR operations, e.g., 1U << Serialization...
Definition: NvInferRuntime.h:2989
@ kLINEAR
Supports linear (1D), bilinear (2D), and trilinear (3D) interpolation.
v_1_0::IPluginV3OneBuild IPluginV3OneBuild
Definition: NvInferRuntime.h:1266
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntime.h:1430
ExecutionContextAllocationStrategy
Different memory allocation behaviors for IExecutionContext.
Definition: NvInferRuntime.h:3104
@ kSTATIC
Default static allocation with the maximum size across all profiles.
@ kUSER_MANAGED
The user supplies custom allocation to the execution context.
@ kON_PROFILE_CHANGE
Reallocate for a profile when it's selected.
constexpr int32_t EnumMax< CudaGraphStrategy >() noexcept
Maximum number of CUDA graph strategies in CudaGraphStrategy enum.
Definition: NvInferRuntime.h:3239
constexpr int32_t EnumMax< TacticSource >() noexcept
Maximum number of tactic sources in TacticSource enum.
Definition: NvInferRuntime.h:2947
LayerInformationFormat
The format in which the IEngineInspector prints the layer information.
Definition: NvInferRuntime.h:5254
@ kJSON
Print layer information in JSON format.
@ kONELINE
Print layer information in one line per layer.
v_1_0::IStreamReader IStreamReader
Definition: NvInferRuntime.h:710
AllocatorFlag
Allowed type of memory allocation.
Definition: NvInferRuntime.h:1554
@ kRESIZABLE
TensorRT may call realloc() on this allocation.
@ kMAX
Maximum over elements.
constexpr int32_t EnumMax< DeviceType >() noexcept
Maximum number of elements in DeviceType enum.
Definition: NvInferRuntime.h:1357
constexpr int32_t EnumMax< DimensionOperation >() noexcept
Maximum number of elements in DimensionOperation enum.
Definition: NvInferRuntime.h:193
constexpr int32_t EnumMax< ExecutionContextAllocationStrategy >() noexcept
Maximum number of memory allocation strategies in ExecutionContextAllocationStrategy enum.
Definition: NvInferRuntime.h:3116
constexpr int32_t EnumMax< SerializationFlag >() noexcept
Maximum number of serialization flags in SerializationFlag enum.
Definition: NvInferRuntime.h:3007
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntime.h:204
@ kHOST
Data stored on host.
@ kDEVICE
Data stored on device.
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:2632
@ kOPT
This is used to set or get the value that is used in the optimization (kernel selection).
uint32_t AllocatorFlags
Definition: NvInferRuntime.h:1570
Summarizes tensors that a plugin might see for an input or output.
Definition: NvInferRuntime.h:362
Dims min
Lower bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:367
Dims max
Upper bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:370
Dims opt
Optimum value of tensor’s dimensions specified for auto-tuning.
Definition: NvInferRuntime.h:373
PluginTensorDesc desc
Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of ...
Definition: NvInferRuntime.h:364
Plugin field collection struct.
Definition: NvInferPluginBase.h:103
Fields that a plugin might see for an input or output.
Definition: NvInferRuntimePlugin.h:73
Declaration of EnumMaxImpl struct to store maximum number of elements in an enumeration type.
Definition: NvInferRuntimeBase.h:128

  Copyright © 2024 NVIDIA Corporation
  Privacy Policy | Manage My Privacy | Do Not Sell or Share My Data | Terms of Service | Accessibility | Corporate Policies | Product Security | Contact