TensorRT 10.12.0
NvInferRuntime.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef NV_INFER_RUNTIME_H
19#define NV_INFER_RUNTIME_H
20
26
27#include "NvInferImpl.h"
28#define NV_INFER_INTERNAL_INCLUDE 1
29#include "NvInferPluginBase.h"
30#undef NV_INFER_INTERNAL_INCLUDE
32
33namespace nvinfer1
34{
35
36class IExecutionContext;
37class ICudaEngine;
38class IPluginFactory;
39class IEngineInspector;
40
49
51{
52protected:
53 INoCopy() = default;
54 virtual ~INoCopy() = default;
55 INoCopy(INoCopy const& other) = delete;
56 INoCopy& operator=(INoCopy const& other) = delete;
57 INoCopy(INoCopy&& other) = delete;
58 INoCopy& operator=(INoCopy&& other) = delete;
59};
60
75enum class EngineCapability : int32_t
76{
81 kSTANDARD = 0,
82
89 kSAFETY = 1,
90
97};
98
99namespace impl
100{
102template <>
104{
105 static constexpr int32_t kVALUE = 3;
106};
107} // namespace impl
108
124{
125public:
127 void const* values;
128 int64_t count;
129};
130
141class IHostMemory : public INoCopy
142{
143public:
144 virtual ~IHostMemory() noexcept = default;
145
147 void* data() const noexcept
148 {
149 return mImpl->data();
150 }
151
153 std::size_t size() const noexcept
154 {
155 return mImpl->size();
156 }
157
159 DataType type() const noexcept
160 {
161 return mImpl->type();
162 }
163
164protected:
165 apiv::VHostMemory* mImpl;
166};
167
178enum class DimensionOperation : int32_t
179{
180 kSUM = 0,
181 kPROD = 1,
182 kMAX = 2,
183 kMIN = 3,
184 kSUB = 4,
185 kEQUAL = 5,
186 kLESS = 6,
187 kFLOOR_DIV = 7,
188 kCEIL_DIV = 8
189};
190
192template <>
193constexpr inline int32_t EnumMax<DimensionOperation>() noexcept
194{
195 return 9;
196}
197
203enum class TensorLocation : int32_t
204{
205 kDEVICE = 0,
206 kHOST = 1,
207};
208
209namespace impl
210{
212template <>
214{
215 static constexpr int32_t kVALUE = 2;
216};
217} // namespace impl
218
232{
233public:
237 bool isConstant() const noexcept
238 {
239 return mImpl->isConstant();
240 }
241
248 int64_t getConstantValue() const noexcept
249 {
250 return mImpl->getConstantValue();
251 }
252
253protected:
254 apiv::VDimensionExpr* mImpl;
255 virtual ~IDimensionExpr() noexcept = default;
256
257public:
263 bool isSizeTensor() const noexcept
264 {
265 return mImpl->isSizeTensor();
266 }
267};
268
286class IExprBuilder : public INoCopy
287{
288public:
292 IDimensionExpr const* constant(int64_t value) noexcept
293 {
294 return mImpl->constant(value);
295 }
296
304 DimensionOperation op, IDimensionExpr const& first, IDimensionExpr const& second) noexcept
305 {
306 return mImpl->operation(op, first, second);
307 }
308
309protected:
310 apiv::VExprBuilder* mImpl;
311 virtual ~IExprBuilder() noexcept = default;
312
313public:
338 IDimensionExpr const* declareSizeTensor(int32_t outputIndex, IDimensionExpr const& opt, IDimensionExpr const& upper)
339 {
340 return mImpl->declareSizeTensor(outputIndex, opt, upper);
341 }
342};
343
350{
351public:
352 int32_t nbDims;
354};
355
362{
365
368
371
374};
375
407{
408public:
409 IPluginV2DynamicExt* clone() const noexcept override = 0;
410
435 virtual DimsExprs getOutputDimensions(
436 int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept = 0;
437
441 static constexpr int32_t kFORMAT_COMBINATION_LIMIT = 100;
442
475 virtual bool supportsFormatCombination(
476 int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
477
515 virtual void configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs,
516 DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
517
527 virtual size_t getWorkspaceSize(PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs,
528 int32_t nbOutputs) const noexcept = 0;
529
542 virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc,
543 void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0;
544
545protected:
553 int32_t getTensorRTVersion() const noexcept override
554 {
555 return (static_cast<int32_t>(PluginVersion::kV2_DYNAMICEXT) << 24 | (NV_TENSORRT_VERSION & 0xFFFFFF));
556 }
557
558 virtual ~IPluginV2DynamicExt() noexcept {}
559
560private:
561 // Following are obsolete base class methods, and must not be implemented or used.
562
566 void configurePlugin(Dims const*, int32_t, Dims const*, int32_t, DataType const*, DataType const*, bool const*,
567 bool const*, PluginFormat, int32_t) noexcept override final
568 {
569 }
570
574 bool supportsFormat(DataType, PluginFormat) const noexcept override final
575 {
576 return false;
577 }
578
582 Dims getOutputDimensions(int32_t, Dims const*, int32_t) noexcept override final
583 {
584 return Dims{-1, {}};
585 }
586
594 TRT_DEPRECATED bool isOutputBroadcastAcrossBatch(int32_t, bool const*, int32_t) const noexcept override final
595 {
596 return false;
597 }
598
606 TRT_DEPRECATED bool canBroadcastInputAcrossBatch(int32_t) const noexcept override final
607 {
608 return true;
609 }
610
614 size_t getWorkspaceSize(int32_t) const noexcept override final
615 {
616 return 0;
617 }
618
622 int32_t enqueue(int32_t, void const* const*, void* const*, void*, cudaStream_t) noexcept override final
623 {
624 return 1;
625 }
626};
627
628namespace v_1_0
629{
631{
632public:
637 ~IStreamReader() override = default;
638 IStreamReader() = default;
639
643 InterfaceInfo getInterfaceInfo() const noexcept override
644 {
645 return InterfaceInfo{"IStreamReader", 1, 0};
646 }
647
656 virtual int64_t read(void* destination, int64_t nbBytes) = 0;
657
658protected:
659 IStreamReader(IStreamReader const&) = default;
663};
664} // namespace v_1_0
665
675
680enum class SeekPosition : int32_t
681{
683 kSET = 0,
684
686 kCUR = 1,
687
689 kEND = 2,
690};
691
692namespace v_1_0
693{
695{
696public:
701 ~IStreamReaderV2() override = default;
702 IStreamReaderV2() = default;
703
707 InterfaceInfo getInterfaceInfo() const noexcept override
708 {
709 return InterfaceInfo{"IStreamReaderV2", 1, 0};
710 }
711
722 virtual int64_t read(void* destination, int64_t nbBytes, cudaStream_t stream) noexcept = 0;
723
732 virtual bool seek(int64_t offset, SeekPosition where) noexcept = 0;
733
734protected:
739};
740} // namespace v_1_0
741
752
767{
768public:
773 virtual IGpuAllocator* getGpuAllocator() const noexcept = 0;
774
779 virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
780 virtual ~IPluginResourceContext() noexcept = default;
781
782protected:
786 IPluginResourceContext& operator=(IPluginResourceContext const&) & = default;
788};
789
790namespace v_1_0
791{
793{
794public:
798 InterfaceInfo getInterfaceInfo() const noexcept override
799 {
800 return InterfaceInfo{"PLUGIN_V3ONE_CORE", 1, 0};
801 }
802
811 virtual AsciiChar const* getPluginName() const noexcept = 0;
812
821 virtual AsciiChar const* getPluginVersion() const noexcept = 0;
822
832 virtual AsciiChar const* getPluginNamespace() const noexcept = 0;
833};
834
836{
837public:
843 static constexpr int32_t kDEFAULT_FORMAT_COMBINATION_LIMIT = 100;
844
848 InterfaceInfo getInterfaceInfo() const noexcept override
849 {
850 return InterfaceInfo{"PLUGIN_V3ONE_BUILD", 1, 0};
851 }
852
872 virtual int32_t configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs,
873 DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
874
891 virtual int32_t getOutputDataTypes(
892 DataType* outputTypes, int32_t nbOutputs, const DataType* inputTypes, int32_t nbInputs) const noexcept = 0;
893
915 virtual int32_t getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, DimsExprs const* shapeInputs,
916 int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept = 0;
917
953 int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
954
960 virtual int32_t getNbOutputs() const noexcept = 0;
961
971 virtual size_t getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs,
972 DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept
973 {
974 return 0;
975 }
976
1008 virtual int32_t getValidTactics(int32_t* tactics, int32_t nbTactics) noexcept
1009 {
1010 return 0;
1011 }
1012
1016 virtual int32_t getNbTactics() noexcept
1017 {
1018 return 0;
1019 }
1020
1032 virtual char const* getTimingCacheID() noexcept
1033 {
1034 return nullptr;
1035 }
1036
1040 virtual int32_t getFormatCombinationLimit() noexcept
1041 {
1042 return kDEFAULT_FORMAT_COMBINATION_LIMIT;
1043 }
1044
1051 virtual char const* getMetadataString() noexcept
1052 {
1053 return nullptr;
1054 }
1055};
1056
1058{
1059public:
1063 InterfaceInfo getInterfaceInfo() const noexcept override
1064 {
1065 return InterfaceInfo{"PLUGIN_V3ONE_RUNTIME", 1, 0};
1066 }
1067
1075 virtual int32_t setTactic(int32_t tactic) noexcept
1076 {
1077 return 0;
1078 }
1079
1098 virtual int32_t onShapeChange(
1099 PluginTensorDesc const* in, int32_t nbInputs, PluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
1100
1114 virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc,
1115 void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0;
1116
1136 virtual IPluginV3* attachToContext(IPluginResourceContext* context) noexcept = 0;
1137
1143
1147 virtual PluginFieldCollection const* getFieldsToSerialize() noexcept = 0;
1148};
1149} // namespace v_1_0
1150
1151namespace v_2_0
1152{
1153
1155{
1156public:
1157 InterfaceInfo getInterfaceInfo() const noexcept override
1158 {
1159 return InterfaceInfo{"PLUGIN_V3ONE_BUILD", 2, 0};
1160 }
1161
1191 virtual int32_t getAliasedInput(int32_t outputIndex) noexcept
1192 {
1193 return -1;
1194 }
1195};
1196
1197} // namespace v_2_0
1198
1209
1221
1233
1242
1243namespace v_1_0
1244{
1246{
1247public:
1255 virtual void reportLayerTime(char const* layerName, float ms) noexcept = 0;
1256
1257 virtual ~IProfiler() noexcept {}
1258};
1259} // namespace v_1_0
1260
1273
1281enum class WeightsRole : int32_t
1282{
1283 kKERNEL = 0,
1284 kBIAS = 1,
1285 kSHIFT = 2,
1286 kSCALE = 3,
1287 kCONSTANT = 4,
1288 kANY = 5,
1289};
1290
1292template <>
1293constexpr inline int32_t EnumMax<WeightsRole>() noexcept
1294{
1295 return 6;
1296}
1297
1303enum class DeviceType : int32_t
1304{
1305 kGPU = 0,
1306 kDLA = 1,
1307};
1308
1310template <>
1311constexpr inline int32_t EnumMax<DeviceType>() noexcept
1312{
1313 return 2;
1314}
1315
1326enum class TempfileControlFlag : int32_t
1327{
1330
1335};
1336
1338template <>
1339constexpr inline int32_t EnumMax<TempfileControlFlag>() noexcept
1340{
1341 return 2;
1342}
1343
1350using TempfileControlFlags = uint32_t;
1351
1383enum class TensorFormat : int32_t
1384{
1391 kLINEAR = 0,
1392
1397 kCHW2 = 1,
1398
1402 kHWC8 = 2,
1403
1417 kCHW4 = 3,
1418
1425 kCHW16 = 4,
1426
1434 kCHW32 = 5,
1435
1440 kDHWC8 = 6,
1441
1446 kCDHW32 = 7,
1447
1451 kHWC = 8,
1452
1461 kDLA_LINEAR = 9,
1462
1476 kDLA_HWC4 = 10,
1477
1482 kHWC16 = 11,
1483
1488 kDHWC = 12
1489};
1490
1491namespace impl
1492{
1494template <>
1496{
1498 static constexpr int32_t kVALUE = 13;
1499};
1500} // namespace impl
1501
1507enum class AllocatorFlag : int32_t
1508{
1510 kRESIZABLE = 0,
1511};
1512
1513namespace impl
1514{
1516template <>
1518{
1520 static constexpr int32_t kVALUE = 1;
1521};
1522} // namespace impl
1523
1524using AllocatorFlags = uint32_t;
1525
1528
1542{
1543public:
1549 enum class Severity : int32_t
1550 {
1552 kINTERNAL_ERROR = 0,
1554 kERROR = 1,
1556 kWARNING = 2,
1558 kINFO = 3,
1560 kVERBOSE = 4,
1561 };
1562
1581 virtual void log(Severity severity, AsciiChar const* msg) noexcept = 0;
1582
1583 ILogger() = default;
1584 virtual ~ILogger() = default;
1585
1586protected:
1587 // @cond SuppressDoxyWarnings
1588 ILogger(ILogger const&) = default;
1589 ILogger(ILogger&&) = default;
1590 ILogger& operator=(ILogger const&) & = default;
1591 ILogger& operator=(ILogger&&) & = default;
1592 // @endcond
1593};
1594
1595namespace impl
1596{
1598template <>
1599struct EnumMaxImpl<ILogger::Severity>
1600{
1602 static constexpr int32_t kVALUE = 5;
1603};
1604} // namespace impl
1605
1606namespace v_1_0
1607{
1608
1610{
1611public:
1637 uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept = 0;
1638
1639 ~IGpuAllocator() override = default;
1640 IGpuAllocator() = default;
1641
1679 virtual void* reallocate(void* const /*baseAddr*/, uint64_t /*alignment*/, uint64_t /*newSize*/) noexcept
1680 {
1681 return nullptr;
1682 }
1683
1702 TRT_DEPRECATED virtual bool deallocate(void* const memory) noexcept = 0;
1703
1732 virtual void* allocateAsync(
1733 uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t /*stream*/) noexcept
1734 {
1735 return allocate(size, alignment, flags);
1736 }
1765 virtual bool deallocateAsync(void* const memory, cudaStream_t /*stream*/) noexcept
1766 {
1767 return deallocate(memory);
1768 }
1769
1773 InterfaceInfo getInterfaceInfo() const noexcept override
1774 {
1775 return {"IGpuAllocator", 1, 0};
1776 }
1777
1778protected:
1779 // @cond SuppressDoxyWarnings
1780 IGpuAllocator(IGpuAllocator const&) = default;
1781 IGpuAllocator(IGpuAllocator&&) = default;
1782 IGpuAllocator& operator=(IGpuAllocator const&) & = default;
1783 IGpuAllocator& operator=(IGpuAllocator&&) & = default;
1784 // @endcond
1785};
1786
1787} // namespace v_1_0
1788
1810
1818class IRuntime : public INoCopy
1819{
1820public:
1821 virtual ~IRuntime() noexcept = default;
1822
1834 void setDLACore(int32_t dlaCore) noexcept
1835 {
1836 mImpl->setDLACore(dlaCore);
1837 }
1838
1844 int32_t getDLACore() const noexcept
1845 {
1846 return mImpl->getDLACore();
1847 }
1848
1852 int32_t getNbDLACores() const noexcept
1853 {
1854 return mImpl->getNbDLACores();
1855 }
1856
1868 void setGpuAllocator(IGpuAllocator* allocator) noexcept
1869 {
1870 mImpl->setGpuAllocator(allocator);
1871 }
1872
1884 //
1887 void setErrorRecorder(IErrorRecorder* recorder) noexcept
1888 {
1889 mImpl->setErrorRecorder(recorder);
1890 }
1891
1903 {
1904 return mImpl->getErrorRecorder();
1905 }
1906
1920 ICudaEngine* deserializeCudaEngine(void const* blob, std::size_t size) noexcept
1921 {
1922 return mImpl->deserializeCudaEngine(blob, size);
1923 }
1924
1945 {
1946 return mImpl->deserializeCudaEngine(streamReader);
1947 }
1948
1968 {
1969 return mImpl->deserializeCudaEngineV2(streamReader);
1970 }
1971
1977 ILogger* getLogger() const noexcept
1978 {
1979 return mImpl->getLogger();
1980 }
1981
1992 bool setMaxThreads(int32_t maxThreads) noexcept
1993 {
1994 return mImpl->setMaxThreads(maxThreads);
1995 }
1996
2006 int32_t getMaxThreads() const noexcept
2007 {
2008 return mImpl->getMaxThreads();
2009 }
2010
2041 void setTemporaryDirectory(char const* path) noexcept
2042 {
2043 return mImpl->setTemporaryDirectory(path);
2044 }
2045
2052 char const* getTemporaryDirectory() const noexcept
2053 {
2054 return mImpl->getTemporaryDirectory();
2055 }
2056
2069 {
2070 return mImpl->setTempfileControlFlags(flags);
2071 }
2072
2081 {
2082 return mImpl->getTempfileControlFlags();
2083 }
2084
2091 {
2092 return mImpl->getPluginRegistry();
2093 }
2094
2108 IRuntime* loadRuntime(char const* path) noexcept
2109 {
2110 return mImpl->loadRuntime(path);
2111 }
2112
2120 void setEngineHostCodeAllowed(bool allowed) noexcept
2121 {
2122 return mImpl->setEngineHostCodeAllowed(allowed);
2123 }
2124
2130 bool getEngineHostCodeAllowed() const noexcept
2131 {
2132 return mImpl->getEngineHostCodeAllowed();
2133 }
2134
2135protected:
2136 apiv::VRuntime* mImpl;
2137};
2138
2146class IRefitter : public INoCopy
2147{
2148public:
2149 virtual ~IRefitter() noexcept = default;
2150
2166 bool setWeights(char const* layerName, WeightsRole role, Weights weights) noexcept
2167 {
2168 return mImpl->setWeights(layerName, role, weights);
2169 }
2170
2183 bool refitCudaEngine() noexcept
2184 {
2185 return mImpl->refitCudaEngine();
2186 }
2187
2204 int32_t getMissing(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
2205 {
2206 return mImpl->getMissing(size, layerNames, roles);
2207 }
2208
2221 int32_t getAll(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
2222 {
2223 return mImpl->getAll(size, layerNames, roles);
2224 }
2225
2243 TRT_DEPRECATED bool setDynamicRange(char const* tensorName, float min, float max) noexcept
2244 {
2245 return mImpl->setDynamicRange(tensorName, min, max);
2246 }
2247
2259 TRT_DEPRECATED float getDynamicRangeMin(char const* tensorName) const noexcept
2260 {
2261 return mImpl->getDynamicRangeMin(tensorName);
2262 }
2263
2275 TRT_DEPRECATED float getDynamicRangeMax(char const* tensorName) const noexcept
2276 {
2277 return mImpl->getDynamicRangeMax(tensorName);
2278 }
2279
2293 TRT_DEPRECATED int32_t getTensorsWithDynamicRange(int32_t size, char const** tensorNames) const noexcept
2294 {
2295 return mImpl->getTensorsWithDynamicRange(size, tensorNames);
2296 }
2297
2309 //
2312 void setErrorRecorder(IErrorRecorder* recorder) noexcept
2313 {
2314 mImpl->setErrorRecorder(recorder);
2315 }
2316
2328 {
2329 return mImpl->getErrorRecorder();
2330 }
2331
2352 bool setNamedWeights(char const* name, Weights weights) noexcept
2353 {
2354 return mImpl->setNamedWeights(name, weights);
2355 }
2356
2372 int32_t getMissingWeights(int32_t size, char const** weightsNames) noexcept
2373 {
2374 return mImpl->getMissingWeights(size, weightsNames);
2375 }
2376
2388 int32_t getAllWeights(int32_t size, char const** weightsNames) noexcept
2389 {
2390 return mImpl->getAllWeights(size, weightsNames);
2391 }
2392
2398 ILogger* getLogger() const noexcept
2399 {
2400 return mImpl->getLogger();
2401 }
2402
2414 bool setMaxThreads(int32_t maxThreads) noexcept
2415 {
2416 return mImpl->setMaxThreads(maxThreads);
2417 }
2418
2428 int32_t getMaxThreads() const noexcept
2429 {
2430 return mImpl->getMaxThreads();
2431 }
2432
2455 bool setNamedWeights(char const* name, Weights weights, TensorLocation location) noexcept
2456 {
2457 return mImpl->setNamedWeightsWithLocation(name, weights, location);
2458 }
2459
2471 Weights getNamedWeights(char const* weightsName) const noexcept
2472 {
2473 return mImpl->getNamedWeights(weightsName);
2474 }
2475
2487 TensorLocation getWeightsLocation(char const* weightsName) const noexcept
2488 {
2489 return mImpl->getWeightsLocation(weightsName);
2490 }
2491
2503 bool unsetNamedWeights(char const* weightsName) noexcept
2504 {
2505 return mImpl->unsetNamedWeights(weightsName);
2506 }
2507
2519 void setWeightsValidation(bool weightsValidation) noexcept
2520 {
2521 return mImpl->setWeightsValidation(weightsValidation);
2522 }
2523
2527 bool getWeightsValidation() const noexcept
2528 {
2529 return mImpl->getWeightsValidation();
2530 }
2531
2549 bool refitCudaEngineAsync(cudaStream_t stream) noexcept
2550 {
2551 return mImpl->refitCudaEngineAsync(stream);
2552 }
2553
2567 Weights getWeightsPrototype(char const* weightsName) const noexcept
2568 {
2569 return mImpl->getWeightsPrototype(weightsName);
2570 }
2571
2572protected:
2573 apiv::VRefitter* mImpl;
2574};
2575
2586enum class OptProfileSelector : int32_t
2587{
2588 kMIN = 0,
2589 kOPT = 1,
2590 kMAX = 2
2591};
2592
2598template <>
2599constexpr inline int32_t EnumMax<OptProfileSelector>() noexcept
2600{
2601 return 3;
2602}
2603
2627{
2628public:
2656 bool setDimensions(char const* inputName, OptProfileSelector select, Dims const& dims) noexcept
2657 {
2658 return mImpl->setDimensions(inputName, select, dims);
2659 }
2660
2668 Dims getDimensions(char const* inputName, OptProfileSelector select) const noexcept
2669 {
2670 return mImpl->getDimensions(inputName, select);
2671 }
2672
2721 char const* inputName, OptProfileSelector select, int32_t const* values, int32_t nbValues) noexcept
2722 {
2723 return mImpl->setShapeValues(inputName, select, values, nbValues);
2724 }
2725
2734 int32_t getNbShapeValues(char const* inputName) const noexcept
2735 {
2736 return mImpl->getNbShapeValues(inputName);
2737 }
2738
2748 TRT_DEPRECATED int32_t const* getShapeValues(char const* inputName, OptProfileSelector select) const noexcept
2749 {
2750 return mImpl->getShapeValues(inputName, select);
2751 }
2752
2766 bool setExtraMemoryTarget(float target) noexcept
2767 {
2768 return mImpl->setExtraMemoryTarget(target);
2769 }
2770
2778 float getExtraMemoryTarget() const noexcept
2779 {
2780 return mImpl->getExtraMemoryTarget();
2781 }
2782
2795 bool isValid() const noexcept
2796 {
2797 return mImpl->isValid();
2798 }
2799
2846 char const* inputName, OptProfileSelector select, int64_t const* values, int32_t nbValues) noexcept
2847 {
2848 return mImpl->setShapeValuesV2(inputName, select, values, nbValues);
2849 }
2850
2858 int64_t const* getShapeValuesV2(char const* inputName, OptProfileSelector select) const noexcept
2859 {
2860 return mImpl->getShapeValuesV2(inputName, select);
2861 }
2862
2863protected:
2864 apiv::VOptimizationProfile* mImpl;
2865 virtual ~IOptimizationProfile() noexcept = default;
2866};
2867
2875enum class TacticSource : int32_t
2876{
2881
2885
2890
2895
2899};
2900
2901template <>
2902constexpr inline int32_t EnumMax<TacticSource>() noexcept
2903{
2904 return 5;
2905}
2906
2913using TacticSources = uint32_t;
2914
2924enum class ProfilingVerbosity : int32_t
2925{
2926 kLAYER_NAMES_ONLY = 0,
2927 kNONE = 1,
2928 kDETAILED = 2,
2929};
2930
2932template <>
2933constexpr inline int32_t EnumMax<ProfilingVerbosity>() noexcept
2934{
2935 return 3;
2936}
2937
2944using SerializationFlags = uint32_t;
2945
2953enum class SerializationFlag : int32_t
2954{
2955 kEXCLUDE_WEIGHTS = 0,
2957};
2958
2960template <>
2961constexpr inline int32_t EnumMax<SerializationFlag>() noexcept
2962{
2963 return 2;
2964}
2965
2974{
2975public:
2976 virtual ~ISerializationConfig() noexcept = default;
2977
2989 bool setFlags(SerializationFlags serializationFlags) noexcept
2990 {
2991 return mImpl->setFlags(serializationFlags);
2992 }
2993
3002 {
3003 return mImpl->getFlags();
3004 }
3005
3013 bool clearFlag(SerializationFlag serializationFlag) noexcept
3014 {
3015 return mImpl->clearFlag(serializationFlag);
3016 }
3017
3025 bool setFlag(SerializationFlag serializationFlag) noexcept
3026 {
3027 return mImpl->setFlag(serializationFlag);
3028 }
3029
3037 bool getFlag(SerializationFlag serializationFlag) const noexcept
3038 {
3039 return mImpl->getFlag(serializationFlag);
3040 }
3041
3042protected:
3043 apiv::VSerializationConfig* mImpl;
3044};
3045
3058{
3059 kSTATIC = 0,
3060 kON_PROFILE_CHANGE = 1,
3061 kUSER_MANAGED = 2,
3062};
3063
3069template <>
3070constexpr inline int32_t EnumMax<ExecutionContextAllocationStrategy>() noexcept
3071{
3072 return 3;
3073}
3074
3075
3083{
3084public:
3085 virtual ~IRuntimeConfig() noexcept = default;
3086
3092 void setExecutionContextAllocationStrategy(ExecutionContextAllocationStrategy strategy) noexcept
3093 {
3094 return mImpl->setExecutionContextAllocationStrategy(strategy);
3095 }
3096
3103 {
3104 return mImpl->getExecutionContextAllocationStrategy();
3105 }
3106
3107
3108protected:
3109 apiv::VRuntimeConfig* mImpl;
3110}; // class IRuntimeConfig
3111
3119class ICudaEngine : public INoCopy
3120{
3121public:
3122 virtual ~ICudaEngine() noexcept = default;
3123
3134 Dims getTensorShape(char const* tensorName) const noexcept
3135 {
3136 return mImpl->getTensorShape(tensorName);
3137 }
3138
3149 DataType getTensorDataType(char const* tensorName) const noexcept
3150 {
3151 return mImpl->getTensorDataType(tensorName);
3152 }
3153
3163 int32_t getNbLayers() const noexcept
3164 {
3165 return mImpl->getNbLayers();
3166 }
3167
3177 IHostMemory* serialize() const noexcept
3178 {
3179 return mImpl->serialize();
3180 }
3181
3196 {
3197 return mImpl->createExecutionContext(strategy);
3198 }
3199
3212 TensorLocation getTensorLocation(char const* tensorName) const noexcept
3213 {
3214 return mImpl->getTensorLocation(tensorName);
3215 }
3216
3232 bool isShapeInferenceIO(char const* tensorName) const noexcept
3233 {
3234 return mImpl->isShapeInferenceIO(tensorName);
3235 }
3236
3246 TensorIOMode getTensorIOMode(char const* tensorName) const noexcept
3247 {
3248 return mImpl->getTensorIOMode(tensorName);
3249 }
3250
3259 {
3260 return mImpl->createExecutionContextWithoutDeviceMemory();
3261 }
3262
3271 {
3272 return mImpl->createExecutionContextWithRuntimeConfig(runtimeConfig);
3273 }
3274
3284 {
3285 return mImpl->createRuntimeConfig();
3286 }
3287
3295 TRT_DEPRECATED size_t getDeviceMemorySize() const noexcept
3296 {
3297 return mImpl->getDeviceMemorySize();
3298 }
3299
3307 TRT_DEPRECATED size_t getDeviceMemorySizeForProfile(int32_t profileIndex) const noexcept
3308 {
3309 return mImpl->getDeviceMemorySizeForProfile(profileIndex);
3310 }
3311
3323 int64_t getDeviceMemorySizeV2() const noexcept
3324 {
3325 return mImpl->getDeviceMemorySizeV2();
3326 }
3327
3339 int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept
3340 {
3341 return mImpl->getDeviceMemorySizeForProfileV2(profileIndex);
3342 }
3343
3349 bool isRefittable() const noexcept
3350 {
3351 return mImpl->isRefittable();
3352 }
3353
3370 int32_t getTensorBytesPerComponent(char const* tensorName) const noexcept
3371 {
3372 return mImpl->getTensorBytesPerComponent(tensorName);
3373 }
3374
3388 int32_t getTensorBytesPerComponent(char const* tensorName, int32_t profileIndex) const noexcept
3389 {
3390 return mImpl->getTensorBytesPerComponentV2(tensorName, profileIndex);
3391 }
3392
3409 int32_t getTensorComponentsPerElement(char const* tensorName) const noexcept
3410 {
3411 return mImpl->getTensorComponentsPerElement(tensorName);
3412 }
3413
3427 int32_t getTensorComponentsPerElement(char const* tensorName, int32_t profileIndex) const noexcept
3428 {
3429 return mImpl->getTensorComponentsPerElementV2(tensorName, profileIndex);
3430 }
3431
3442 TensorFormat getTensorFormat(char const* tensorName) const noexcept
3443 {
3444 return mImpl->getTensorFormat(tensorName);
3445 }
3446
3456 TensorFormat getTensorFormat(char const* tensorName, int32_t profileIndex) const noexcept
3457 {
3458 return mImpl->getTensorFormatV2(tensorName, profileIndex);
3459 }
3460
3480 char const* getTensorFormatDesc(char const* tensorName) const noexcept
3481 {
3482 return mImpl->getTensorFormatDesc(tensorName);
3483 }
3484
3503 char const* getTensorFormatDesc(char const* tensorName, int32_t profileIndex) const noexcept
3504 {
3505 return mImpl->getTensorFormatDescV2(tensorName, profileIndex);
3506 }
3507
3520 int32_t getTensorVectorizedDim(char const* tensorName) const noexcept
3521 {
3522 return mImpl->getTensorVectorizedDim(tensorName);
3523 }
3524
3536 int32_t getTensorVectorizedDim(char const* tensorName, int32_t profileIndex) const noexcept
3537 {
3538 return mImpl->getTensorVectorizedDimV2(tensorName, profileIndex);
3539 }
3540
3551 char const* getName() const noexcept
3552 {
3553 return mImpl->getName();
3554 }
3555
3562 int32_t getNbOptimizationProfiles() const noexcept
3563 {
3564 return mImpl->getNbOptimizationProfiles();
3565 }
3566
3582 Dims getProfileShape(char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
3583 {
3584 return mImpl->getProfileShape(tensorName, profileIndex, select);
3585 }
3586
3608 char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
3609 {
3610 return mImpl->getProfileTensorValues(tensorName, profileIndex, select);
3611 }
3612
3624 {
3625 return mImpl->getEngineCapability();
3626 }
3627
3642 void setErrorRecorder(IErrorRecorder* recorder) noexcept
3643 {
3644 return mImpl->setErrorRecorder(recorder);
3645 }
3646
3658 {
3659 return mImpl->getErrorRecorder();
3660 }
3661
3672 {
3673 return mImpl->hasImplicitBatchDimension();
3674 }
3675
3688 {
3689 return mImpl->getTacticSources();
3690 }
3691
3700 {
3701 return mImpl->getProfilingVerbosity();
3702 }
3703
3710 {
3711 return mImpl->createEngineInspector();
3712 }
3713
3722 int32_t getNbIOTensors() const noexcept
3723 {
3724 return mImpl->getNbIOTensors();
3725 }
3726
3734 char const* getIOTensorName(int32_t index) const noexcept
3735 {
3736 return mImpl->getIOTensorName(index);
3737 }
3738
3746 {
3747 return mImpl->getHardwareCompatibilityLevel();
3748 }
3749
3760 int32_t getNbAuxStreams() const noexcept
3761 {
3762 return mImpl->getNbAuxStreams();
3763 }
3764
3771 {
3772 return mImpl->createSerializationConfig();
3773 }
3774
3787 {
3788 return mImpl->serializeWithConfig(config);
3789 }
3790
3831 TRT_DEPRECATED bool setWeightStreamingBudget(int64_t gpuMemoryBudget) noexcept
3832 {
3833 return mImpl->setWeightStreamingBudget(gpuMemoryBudget);
3834 }
3835
3852 {
3853 return mImpl->getWeightStreamingBudget();
3854 }
3855
3875 {
3876 return mImpl->getMinimumWeightStreamingBudget();
3877 }
3878
3890 int64_t getStreamableWeightsSize() const noexcept
3891 {
3892 return mImpl->getStreamableWeightsSize();
3893 }
3894
3932 bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept
3933 {
3934 return mImpl->setWeightStreamingBudgetV2(gpuMemoryBudget);
3935 }
3936
3950 int64_t getWeightStreamingBudgetV2() const noexcept
3951 {
3952 return mImpl->getWeightStreamingBudgetV2();
3953 }
3954
3975 int64_t getWeightStreamingAutomaticBudget() const noexcept
3976 {
3977 return mImpl->getWeightStreamingAutomaticBudget();
3978 }
3979
4004 {
4005 return mImpl->getWeightStreamingScratchMemorySize();
4006 }
4007
4017 bool isDebugTensor(char const* name) const noexcept
4018 {
4019 return mImpl->isDebugTensor(name);
4020 }
4021
4042 char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
4043 {
4044 return mImpl->getProfileTensorValuesV2(tensorName, profileIndex, select);
4045 }
4046
4047protected:
4048 apiv::VCudaEngine* mImpl;
4049};
4050
4051namespace v_1_0
4052{
4054{
4055public:
4059 InterfaceInfo getInterfaceInfo() const noexcept override
4060 {
4061 return {"IOutputAllocator", 1, 0};
4062 }
4063
4084 char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment) noexcept
4085 {
4086 return nullptr;
4087 }
4088
4112 char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment, cudaStream_t /*stream*/)
4113 {
4114 return reallocateOutput(tensorName, currentMemory, size, alignment);
4115 }
4116
4125 virtual void notifyShape(char const* tensorName, Dims const& dims) noexcept = 0;
4126};
4127} // namespace v_1_0
4128
4137
4138namespace v_1_0
4139{
4141{
4142public:
4146 InterfaceInfo getInterfaceInfo() const noexcept override
4147 {
4148 return {"IDebugListener", 1, 0};
4149 }
4150
4164 virtual bool processDebugTensor(void const* addr, TensorLocation location, DataType type, Dims const& shape,
4165 char const* name, cudaStream_t stream)
4166 = 0;
4167
4168 ~IDebugListener() override = default;
4169};
4170} // namespace v_1_0
4171
4178
4190{
4191public:
4192 virtual ~IExecutionContext() noexcept = default;
4193
4202 void setDebugSync(bool sync) noexcept
4203 {
4204 mImpl->setDebugSync(sync);
4205 }
4206
4212 bool getDebugSync() const noexcept
4213 {
4214 return mImpl->getDebugSync();
4215 }
4216
4222 void setProfiler(IProfiler* profiler) noexcept
4223 {
4224 mImpl->setProfiler(profiler);
4225 }
4226
4232 IProfiler* getProfiler() const noexcept
4233 {
4234 return mImpl->getProfiler();
4235 }
4236
4242 ICudaEngine const& getEngine() const noexcept
4243 {
4244 return mImpl->getEngine();
4245 }
4246
4256 void setName(char const* name) noexcept
4257 {
4258 mImpl->setName(name);
4259 }
4260
4266 char const* getName() const noexcept
4267 {
4268 return mImpl->getName();
4269 }
4270
4293 void setDeviceMemory(void* memory) noexcept
4294 {
4295 mImpl->setDeviceMemory(memory);
4296 }
4297
4315 void setDeviceMemoryV2(void* memory, int64_t size) noexcept
4316 {
4317 return mImpl->setDeviceMemoryV2(memory, size);
4318 }
4319
4336 Dims getTensorStrides(char const* tensorName) const noexcept
4337 {
4338 return mImpl->getTensorStrides(tensorName);
4339 }
4340
4341public:
4351 int32_t getOptimizationProfile() const noexcept
4352 {
4353 return mImpl->getOptimizationProfile();
4354 }
4355
4369 bool setInputShape(char const* tensorName, Dims const& dims) noexcept
4370 {
4371 return mImpl->setInputShape(tensorName, dims);
4372 }
4373
4406 Dims getTensorShape(char const* tensorName) const noexcept
4407 {
4408 return mImpl->getTensorShape(tensorName);
4409 }
4410
4422 bool allInputDimensionsSpecified() const noexcept
4423 {
4424 return mImpl->allInputDimensionsSpecified();
4425 }
4426
4440 {
4441 return mImpl->allInputShapesSpecified();
4442 }
4443
4458 void setErrorRecorder(IErrorRecorder* recorder) noexcept
4459 {
4460 mImpl->setErrorRecorder(recorder);
4461 }
4462
4474 {
4475 return mImpl->getErrorRecorder();
4476 }
4477
4490 bool executeV2(void* const* bindings) noexcept
4491 {
4492 return mImpl->executeV2(bindings);
4493 }
4494
4534 bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
4535 {
4536 return mImpl->setOptimizationProfileAsync(profileIndex, stream);
4537 }
4538
4550 void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
4551 {
4552 mImpl->setEnqueueEmitsProfile(enqueueEmitsProfile);
4553 }
4554
4562 bool getEnqueueEmitsProfile() const noexcept
4563 {
4564 return mImpl->getEnqueueEmitsProfile();
4565 }
4566
4592 bool reportToProfiler() const noexcept
4593 {
4594 return mImpl->reportToProfiler();
4595 }
4596
4636 bool setTensorAddress(char const* tensorName, void* data) noexcept
4637 {
4638 return mImpl->setTensorAddress(tensorName, data);
4639 }
4640
4653 void const* getTensorAddress(char const* tensorName) const noexcept
4654 {
4655 return mImpl->getTensorAddress(tensorName);
4656 }
4657
4676 bool setOutputTensorAddress(char const* tensorName, void* data) noexcept
4677 {
4678 return mImpl->setOutputTensorAddress(tensorName, data);
4679 }
4680
4698 bool setInputTensorAddress(char const* tensorName, void const* data) noexcept
4699 {
4700 return mImpl->setInputTensorAddress(tensorName, data);
4701 }
4702
4717 void* getOutputTensorAddress(char const* tensorName) const noexcept
4718 {
4719 return mImpl->getOutputTensorAddress(tensorName);
4720 }
4721
4750 int32_t inferShapes(int32_t nbMaxNames, char const** tensorNames) noexcept
4751 {
4752 return mImpl->inferShapes(nbMaxNames, tensorNames);
4753 }
4754
4768 {
4769 return mImpl->updateDeviceMemorySizeForShapes();
4770 }
4771
4783 bool setInputConsumedEvent(cudaEvent_t event) noexcept
4784 {
4785 return mImpl->setInputConsumedEvent(event);
4786 }
4787
4793 cudaEvent_t getInputConsumedEvent() const noexcept
4794 {
4795 return mImpl->getInputConsumedEvent();
4796 }
4797
4812 bool setOutputAllocator(char const* tensorName, IOutputAllocator* outputAllocator) noexcept
4813 {
4814 return mImpl->setOutputAllocator(tensorName, outputAllocator);
4815 }
4816
4825 IOutputAllocator* getOutputAllocator(char const* tensorName) const noexcept
4826 {
4827 return mImpl->getOutputAllocator(tensorName);
4828 }
4829
4843 int64_t getMaxOutputSize(char const* tensorName) const noexcept
4844 {
4845 return mImpl->getMaxOutputSize(tensorName);
4846 }
4847
4864 {
4865 return mImpl->setTemporaryStorageAllocator(allocator);
4866 }
4867
4874 {
4875 return mImpl->getTemporaryStorageAllocator();
4876 }
4877
4897 bool enqueueV3(cudaStream_t stream) noexcept
4898 {
4899 return mImpl->enqueueV3(stream);
4900 }
4901
4913 void setPersistentCacheLimit(size_t size) noexcept
4914 {
4915 mImpl->setPersistentCacheLimit(size);
4916 }
4917
4924 size_t getPersistentCacheLimit() const noexcept
4925 {
4926 return mImpl->getPersistentCacheLimit();
4927 }
4928
4948 bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
4949 {
4950 return mImpl->setNvtxVerbosity(verbosity);
4951 }
4952
4961 {
4962 return mImpl->getNvtxVerbosity();
4963 }
4964
4991 void setAuxStreams(cudaStream_t* auxStreams, int32_t nbStreams) noexcept
4992 {
4993 mImpl->setAuxStreams(auxStreams, nbStreams);
4994 }
4995
5003 bool setDebugListener(IDebugListener* listener) noexcept
5004 {
5005 return mImpl->setDebugListener(listener);
5006 }
5007
5014 {
5015 return mImpl->getDebugListener();
5016 }
5017
5032 bool setTensorDebugState(char const* name, bool flag) noexcept
5033 {
5034 return mImpl->setTensorDebugState(name, flag);
5035 }
5036
5044 bool getDebugState(char const* name) const noexcept
5045 {
5046 return mImpl->getDebugState(name);
5047 }
5048
5055 {
5056 return mImpl->getRuntimeConfig();
5057 }
5058
5067 bool setAllTensorsDebugState(bool flag) noexcept
5068 {
5069 return mImpl->setAllTensorsDebugState(flag);
5070 }
5071
5083 bool setUnfusedTensorsDebugState(bool flag) noexcept
5084 {
5085 return mImpl->setUnfusedTensorsDebugState(flag);
5086 }
5087
5093 bool getUnfusedTensorsDebugState() const noexcept
5094 {
5095 return mImpl->getUnfusedTensorsDebugState();
5096 }
5097
5098protected:
5099 apiv::VExecutionContext* mImpl;
5100}; // class IExecutionContext
5101
5109enum class LayerInformationFormat : int32_t
5110{
5111 kONELINE = 0,
5112 kJSON = 1,
5113};
5114
5117template <>
5118constexpr inline int32_t EnumMax<LayerInformationFormat>() noexcept
5119{
5120 return 2;
5121}
5122
5139{
5140public:
5141 virtual ~IEngineInspector() noexcept = default;
5142
5155 bool setExecutionContext(IExecutionContext const* context) noexcept
5156 {
5157 return mImpl->setExecutionContext(context);
5158 }
5159
5168 {
5169 return mImpl->getExecutionContext();
5170 }
5171
5192 char const* getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
5193 {
5194 return mImpl->getLayerInformation(layerIndex, format);
5195 }
5196
5215 char const* getEngineInformation(LayerInformationFormat format) const noexcept
5216 {
5217 return mImpl->getEngineInformation(format);
5218 }
5219
5234 void setErrorRecorder(IErrorRecorder* recorder) noexcept
5235 {
5236 mImpl->setErrorRecorder(recorder);
5237 }
5238
5250 {
5251 return mImpl->getErrorRecorder();
5252 }
5253
5254protected:
5255 apiv::VEngineInspector* mImpl;
5256}; // class IEngineInspector
5257
5258} // namespace nvinfer1
5259
5264extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(void* logger, int32_t version) noexcept;
5265
5270extern "C" TENSORRTAPI void* createInferRefitter_INTERNAL(void* engine, void* logger, int32_t version) noexcept;
5271
5276
5282extern "C" TENSORRTAPI nvinfer1::ILogger* getLogger() noexcept;
5283
5284namespace nvinfer1
5285{
5286namespace // unnamed namespace avoids linkage surprises when linking objects built with different versions of this
5287 // header.
5288{
5294inline IRuntime* createInferRuntime(ILogger& logger) noexcept
5295{
5296 return static_cast<IRuntime*>(createInferRuntime_INTERNAL(&logger, NV_TENSORRT_VERSION));
5297}
5298
5305inline IRefitter* createInferRefitter(ICudaEngine& engine, ILogger& logger) noexcept
5306{
5307 return static_cast<IRefitter*>(createInferRefitter_INTERNAL(&engine, &logger, NV_TENSORRT_VERSION));
5308}
5309
5310} // namespace
5311
5323template <typename T>
5325{
5326public:
5328 {
5329 getPluginRegistry()->registerCreator(instance, "");
5330 }
5331
5332private:
5334 T instance{};
5335};
5336
5337} // namespace nvinfer1
5338
5339#define REGISTER_TENSORRT_PLUGIN(name) \
5340 static nvinfer1::PluginRegistrar<name> pluginRegistrar##name {}
5341
5342namespace nvinfer1
5343{
5353{
5354public:
5362 virtual ILogger* findLogger() = 0;
5363
5364protected:
5365 virtual ~ILoggerFinder() = default;
5366};
5367
5370namespace v_1_0
5371{
5372
5374{
5375public:
5377 ~IGpuAsyncAllocator() override = default;
5378
5408 void* allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags,
5409 cudaStream_t /*stream*/) noexcept override = 0;
5410
5436 bool deallocateAsync(void* const memory, cudaStream_t /*stream*/) noexcept override = 0;
5437
5462 uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept override
5463 {
5464 return allocateAsync(size, alignment, flags, nullptr);
5465 }
5466
5485 TRT_DEPRECATED bool deallocate(void* const memory) noexcept override
5486 {
5487 return deallocateAsync(memory, nullptr);
5488 }
5489
5493 InterfaceInfo getInterfaceInfo() const noexcept override
5494 {
5495 return {"IGpuAllocator", 1, 0};
5496 }
5497};
5498
5500{
5501public:
5505 InterfaceInfo getInterfaceInfo() const noexcept override
5506 {
5507 return InterfaceInfo{"PLUGIN CREATOR_V3ONE", 1, 0};
5508 }
5509
5527 AsciiChar const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept = 0;
5528
5535 virtual PluginFieldCollection const* getFieldNames() noexcept = 0;
5536
5543 virtual AsciiChar const* getPluginName() const noexcept = 0;
5544
5551 virtual AsciiChar const* getPluginVersion() const noexcept = 0;
5552
5559 virtual AsciiChar const* getPluginNamespace() const noexcept = 0;
5560
5562 virtual ~IPluginCreatorV3One() = default;
5563
5564protected:
5567 IPluginCreatorV3One& operator=(IPluginCreatorV3One const&) & = default;
5568 IPluginCreatorV3One& operator=(IPluginCreatorV3One&&) & = default;
5569};
5570
5571} // namespace v_1_0
5572
5587
5597
5598} // namespace nvinfer1
5599
5603extern "C" TENSORRTAPI int32_t getInferLibMajorVersion() noexcept;
5607extern "C" TENSORRTAPI int32_t getInferLibMinorVersion() noexcept;
5611extern "C" TENSORRTAPI int32_t getInferLibPatchVersion() noexcept;
5615extern "C" TENSORRTAPI int32_t getInferLibBuildVersion() noexcept;
5616
5617#endif // NV_INFER_RUNTIME_H
nvinfer1::ILogger * getLogger() noexcept
Return the logger object.
int32_t getInferLibMajorVersion() noexcept
Return the library major version number.
nvinfer1::IPluginRegistry * getPluginRegistry() noexcept
Return the plugin registry.
int32_t getInferLibPatchVersion() noexcept
Return the library patch version number.
int32_t getInferLibMinorVersion() noexcept
Return the library minor version number.
int32_t getInferLibBuildVersion() noexcept
Return the library build version number.
#define TENSORRTAPI
Definition: NvInferRuntimeBase.h:69
#define NV_TENSORRT_VERSION
Definition: NvInferRuntimeBase.h:101
#define TRT_DEPRECATED
Definition: NvInferRuntimeBase.h:42
#define TRT_DEPRECATED_ENUM
Definition: NvInferRuntimeBase.h:43
Structure to define the dimensions of a tensor.
Definition: NvInferRuntimeBase.h:216
static constexpr int32_t MAX_DIMS
The maximum rank (number of dimensions) supported for a tensor.
Definition: NvInferRuntimeBase.h:219
Analog of class Dims with expressions instead of constants for the dimensions.
Definition: NvInferRuntime.h:350
IDimensionExpr const * d[Dims::MAX_DIMS]
The extent of each dimension.
Definition: NvInferRuntime.h:353
int32_t nbDims
The number of dimensions.
Definition: NvInferRuntime.h:352
An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:3120
int32_t getTensorBytesPerComponent(char const *tensorName) const noexcept
Return the number of bytes per component of an element, or -1 if the tensor is not vectorized or prov...
Definition: NvInferRuntime.h:3370
ISerializationConfig * createSerializationConfig() noexcept
Create a serialization configuration object.
Definition: NvInferRuntime.h:3770
TRT_DEPRECATED int64_t getWeightStreamingBudget() const noexcept
Returns the current weight streaming device memory budget in bytes.
Definition: NvInferRuntime.h:3851
char const * getIOTensorName(int32_t index) const noexcept
Return name of an IO tensor.
Definition: NvInferRuntime.h:3734
int64_t getWeightStreamingBudgetV2() const noexcept
Returns the current weight streaming device memory budget in bytes.
Definition: NvInferRuntime.h:3950
EngineCapability getEngineCapability() const noexcept
Determine what execution capability this engine has.
Definition: NvInferRuntime.h:3623
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:3657
TensorFormat getTensorFormat(char const *tensorName, int32_t profileIndex) const noexcept
Return the tensor format of given profile, or TensorFormat::kLINEAR if the provided name does not map...
Definition: NvInferRuntime.h:3456
int64_t const * getProfileTensorValuesV2(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values (not dimensions) for an input tensor given its name under ...
Definition: NvInferRuntime.h:4041
TRT_DEPRECATED bool hasImplicitBatchDimension() const noexcept
Query whether the engine was built with an implicit batch dimension.
Definition: NvInferRuntime.h:3671
apiv::VCudaEngine * mImpl
Definition: NvInferRuntime.h:4048
TRT_DEPRECATED size_t getDeviceMemorySizeForProfile(int32_t profileIndex) const noexcept
Return the maximum device memory required by the context for a profile.
Definition: NvInferRuntime.h:3307
IExecutionContext * createExecutionContext(ExecutionContextAllocationStrategy strategy=ExecutionContextAllocationStrategy::kSTATIC) noexcept
Create an execution context and specify the strategy for allocating internal activation memory.
Definition: NvInferRuntime.h:3194
char const * getTensorFormatDesc(char const *tensorName) const noexcept
Return the human readable description of the tensor format, or empty string if the provided name does...
Definition: NvInferRuntime.h:3480
Dims getProfileShape(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for an input tensor given its name under an optimizati...
Definition: NvInferRuntime.h:3582
bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept
Limit the maximum amount of GPU memory usable for network weights in bytes.
Definition: NvInferRuntime.h:3932
IExecutionContext * createExecutionContext(IRuntimeConfig *runtimeConfig) noexcept
Create an execution context with TensorRT JIT runtime config.
Definition: NvInferRuntime.h:3270
int32_t getNbAuxStreams() const noexcept
Return the number of auxiliary streams used by this engine.
Definition: NvInferRuntime.h:3760
int64_t getStreamableWeightsSize() const noexcept
Get the total size in bytes of all streamable weights.
Definition: NvInferRuntime.h:3890
DataType getTensorDataType(char const *tensorName) const noexcept
Determine the required data type for a buffer from its tensor name.
Definition: NvInferRuntime.h:3149
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:3642
TacticSources getTacticSources() const noexcept
return the tactic sources required by this engine.
Definition: NvInferRuntime.h:3687
IHostMemory * serializeWithConfig(ISerializationConfig &config) const noexcept
Serialize the network to a stream with the provided SerializationConfig.
Definition: NvInferRuntime.h:3786
virtual ~ICudaEngine() noexcept=default
int64_t getWeightStreamingAutomaticBudget() const noexcept
TensorRT automatically determines a device memory budget for the model to run. The budget is close to...
Definition: NvInferRuntime.h:3975
bool isDebugTensor(char const *name) const noexcept
Check if a tensor is marked as a debug tensor.
Definition: NvInferRuntime.h:4017
int32_t getTensorVectorizedDim(char const *tensorName, int32_t profileIndex) const noexcept
Return the dimension index that the buffer is vectorized of given profile, or -1 if the provided name...
Definition: NvInferRuntime.h:3536
char const * getName() const noexcept
Returns the name of the network associated with the engine.
Definition: NvInferRuntime.h:3551
ProfilingVerbosity getProfilingVerbosity() const noexcept
Return the ProfilingVerbosity the builder config was set to when the engine was built.
Definition: NvInferRuntime.h:3699
bool isShapeInferenceIO(char const *tensorName) const noexcept
True if tensor is required as input for shape calculations or is output from shape calculations.
Definition: NvInferRuntime.h:3232
TRT_DEPRECATED int32_t const * getProfileTensorValues(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values (not dimensions) for an input tensor given its name under ...
Definition: NvInferRuntime.h:3607
int64_t getWeightStreamingScratchMemorySize() const noexcept
Returns the size of the scratch memory required by the current weight streaming budget.
Definition: NvInferRuntime.h:4003
TRT_DEPRECATED bool setWeightStreamingBudget(int64_t gpuMemoryBudget) noexcept
Limit the maximum amount of GPU memory usable for network weights in bytes.
Definition: NvInferRuntime.h:3831
int64_t getDeviceMemorySizeV2() const noexcept
Return the maximum device memory required by the context over all profiles.
Definition: NvInferRuntime.h:3323
int32_t getTensorVectorizedDim(char const *tensorName) const noexcept
Return the dimension index that the buffer is vectorized, or -1 if the provided name does not map to ...
Definition: NvInferRuntime.h:3520
TRT_DEPRECATED size_t getDeviceMemorySize() const noexcept
Return the maximum device memory required by the context over all profiles.
Definition: NvInferRuntime.h:3295
int32_t getTensorComponentsPerElement(char const *tensorName, int32_t profileIndex) const noexcept
Return the number of components included in one element of given profile, or -1 if tensor is not vect...
Definition: NvInferRuntime.h:3427
int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept
Return the maximum device memory required by the context for a profile.
Definition: NvInferRuntime.h:3339
IRuntimeConfig * createRuntimeConfig() noexcept
Create a runtime config for TensorRT JIT. The caller is responsible for ownership of the returned IRu...
Definition: NvInferRuntime.h:3283
TensorFormat getTensorFormat(char const *tensorName) const noexcept
Return the tensor format, or TensorFormat::kLINEAR if the provided name does not map to an input or o...
Definition: NvInferRuntime.h:3442
IHostMemory * serialize() const noexcept
Serialize the network to a stream.
Definition: NvInferRuntime.h:3177
TensorLocation getTensorLocation(char const *tensorName) const noexcept
Get whether an input or output tensor must be on GPU or CPU.
Definition: NvInferRuntime.h:3212
IEngineInspector * createEngineInspector() const noexcept
Create a new engine inspector which prints the layer information in an engine or an execution context...
Definition: NvInferRuntime.h:3709
int32_t getTensorBytesPerComponent(char const *tensorName, int32_t profileIndex) const noexcept
Return the number of bytes per component of an element given of given profile, or -1 if the tensor is...
Definition: NvInferRuntime.h:3388
HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept
Return the hardware compatibility level of this engine.
Definition: NvInferRuntime.h:3745
int32_t getNbOptimizationProfiles() const noexcept
Get the number of optimization profiles defined for this engine.
Definition: NvInferRuntime.h:3562
TRT_DEPRECATED IExecutionContext * createExecutionContextWithoutDeviceMemory() noexcept
create an execution context without any device memory allocated
Definition: NvInferRuntime.h:3258
char const * getTensorFormatDesc(char const *tensorName, int32_t profileIndex) const noexcept
Return the human readable description of the tensor format of given profile, or empty string if the p...
Definition: NvInferRuntime.h:3503
TRT_DEPRECATED int64_t getMinimumWeightStreamingBudget() const noexcept
The minimum number of bytes of GPU memory required by network weights for successful weight streaming...
Definition: NvInferRuntime.h:3874
TensorIOMode getTensorIOMode(char const *tensorName) const noexcept
Determine whether a tensor is an input or output tensor.
Definition: NvInferRuntime.h:3246
int32_t getNbLayers() const noexcept
Get the number of layers in the network.
Definition: NvInferRuntime.h:3163
int32_t getNbIOTensors() const noexcept
Return number of IO tensors.
Definition: NvInferRuntime.h:3722
int32_t getTensorComponentsPerElement(char const *tensorName) const noexcept
Return the number of components included in one element, or -1 if tensor is not vectorized or if the ...
Definition: NvInferRuntime.h:3409
bool isRefittable() const noexcept
Return true if an engine can be refit.
Definition: NvInferRuntime.h:3349
An IDimensionExpr represents an integer expression constructed from constants, input dimensions,...
Definition: NvInferRuntime.h:232
bool isConstant() const noexcept
Return true if expression is a build-time constant.
Definition: NvInferRuntime.h:237
bool isSizeTensor() const noexcept
Return true if this denotes the value of a size tensor.
Definition: NvInferRuntime.h:263
virtual ~IDimensionExpr() noexcept=default
apiv::VDimensionExpr * mImpl
Definition: NvInferRuntime.h:254
int64_t getConstantValue() const noexcept
Get the value of the constant.
Definition: NvInferRuntime.h:248
An engine inspector which prints out the layer information of an engine or an execution context.
Definition: NvInferRuntime.h:5139
char const * getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
Get a string describing the information about a specific layer in the current engine or the execution...
Definition: NvInferRuntime.h:5192
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:5249
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:5234
IExecutionContext const * getExecutionContext() const noexcept
Get the context currently being inspected.
Definition: NvInferRuntime.h:5167
apiv::VEngineInspector * mImpl
Definition: NvInferRuntime.h:5255
virtual ~IEngineInspector() noexcept=default
char const * getEngineInformation(LayerInformationFormat format) const noexcept
Get a string describing the information about all the layers in the current engine or the execution c...
Definition: NvInferRuntime.h:5215
Context for executing inference using an engine, with functionally unsafe features.
Definition: NvInferRuntime.h:4190
IOutputAllocator * getOutputAllocator(char const *tensorName) const noexcept
Get output allocator associated with output tensor of given name, or nullptr if the provided name doe...
Definition: NvInferRuntime.h:4825
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:4473
bool reportToProfiler() const noexcept
Calculate layer timing info for the current optimization profile in IExecutionContext and update the ...
Definition: NvInferRuntime.h:4592
virtual ~IExecutionContext() noexcept=default
void setDeviceMemory(void *memory) noexcept
Set the device memory for use by this execution context.
Definition: NvInferRuntime.h:4293
TRT_DEPRECATED bool allInputShapesSpecified() const noexcept
Whether all input shape bindings have been specified.
Definition: NvInferRuntime.h:4439
bool setTensorDebugState(char const *name, bool flag) noexcept
Set debug state of tensor given the tensor name.
Definition: NvInferRuntime.h:5032
char const * getName() const noexcept
Return the name of the execution context.
Definition: NvInferRuntime.h:4266
IGpuAllocator * getTemporaryStorageAllocator() const noexcept
Get allocator set by setTemporaryStorageAllocator.
Definition: NvInferRuntime.h:4873
void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
Set whether enqueue emits layer timing to the profiler.
Definition: NvInferRuntime.h:4550
bool setUnfusedTensorsDebugState(bool flag) noexcept
Turn the debug state of unfused tensors on or off.
Definition: NvInferRuntime.h:5083
Dims getTensorShape(char const *tensorName) const noexcept
Return the shape of the given input or output.
Definition: NvInferRuntime.h:4406
bool getDebugState(char const *name) const noexcept
Get the debug state.
Definition: NvInferRuntime.h:5044
bool setInputShape(char const *tensorName, Dims const &dims) noexcept
Set shape of given input.
Definition: NvInferRuntime.h:4369
bool executeV2(void *const *bindings) noexcept
Synchronously execute a network.
Definition: NvInferRuntime.h:4490
bool getEnqueueEmitsProfile() const noexcept
Get the enqueueEmitsProfile state.
Definition: NvInferRuntime.h:4562
void const * getTensorAddress(char const *tensorName) const noexcept
Get memory address bound to given input or output tensor, or nullptr if the provided name does not ma...
Definition: NvInferRuntime.h:4653
bool setOutputAllocator(char const *tensorName, IOutputAllocator *outputAllocator) noexcept
Set output allocator to use for output tensor of given name. Pass nullptr to outputAllocator to unset...
Definition: NvInferRuntime.h:4812
bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
Select an optimization profile for the current context with async semantics.
Definition: NvInferRuntime.h:4534
apiv::VExecutionContext * mImpl
Definition: NvInferRuntime.h:5099
bool setOutputTensorAddress(char const *tensorName, void *data) noexcept
Set the memory address for a given output tensor.
Definition: NvInferRuntime.h:4676
void setPersistentCacheLimit(size_t size) noexcept
Set the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:4913
size_t getPersistentCacheLimit() const noexcept
Get the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:4924
bool setAllTensorsDebugState(bool flag) noexcept
Turn the debug state of all debug tensors on or off.
Definition: NvInferRuntime.h:5067
ICudaEngine const & getEngine() const noexcept
Get the associated engine.
Definition: NvInferRuntime.h:4242
ProfilingVerbosity getNvtxVerbosity() const noexcept
Get the NVTX verbosity of the execution context.
Definition: NvInferRuntime.h:4960
size_t updateDeviceMemorySizeForShapes() noexcept
Recompute the internal activation buffer sizes based on the current input shapes, and return the tota...
Definition: NvInferRuntime.h:4767
void setAuxStreams(cudaStream_t *auxStreams, int32_t nbStreams) noexcept
Set the auxiliary streams that TensorRT should launch kernels on in the next enqueueV3() call.
Definition: NvInferRuntime.h:4991
int64_t getMaxOutputSize(char const *tensorName) const noexcept
Get upper bound on an output tensor's size, in bytes, based on the current optimization profile and i...
Definition: NvInferRuntime.h:4843
int32_t inferShapes(int32_t nbMaxNames, char const **tensorNames) noexcept
Run shape calculations.
Definition: NvInferRuntime.h:4750
bool setDebugListener(IDebugListener *listener) noexcept
Set DebugListener for this execution context.
Definition: NvInferRuntime.h:5003
bool setTensorAddress(char const *tensorName, void *data) noexcept
Set memory address for given input or output tensor.
Definition: NvInferRuntime.h:4636
bool setTemporaryStorageAllocator(IGpuAllocator *allocator) noexcept
Specify allocator to use for internal temporary storage.
Definition: NvInferRuntime.h:4863
void * getOutputTensorAddress(char const *tensorName) const noexcept
Get memory address for given output.
Definition: NvInferRuntime.h:4717
bool enqueueV3(cudaStream_t stream) noexcept
Enqueue inference on a stream.
Definition: NvInferRuntime.h:4897
IDebugListener * getDebugListener() noexcept
Get the DebugListener of this execution context.
Definition: NvInferRuntime.h:5013
int32_t getOptimizationProfile() const noexcept
Get the index of the currently selected optimization profile.
Definition: NvInferRuntime.h:4351
bool setInputTensorAddress(char const *tensorName, void const *data) noexcept
Set memory address for given input.
Definition: NvInferRuntime.h:4698
bool getDebugSync() const noexcept
Get the debug sync flag.
Definition: NvInferRuntime.h:4212
bool setInputConsumedEvent(cudaEvent_t event) noexcept
Mark input as consumed.
Definition: NvInferRuntime.h:4783
Dims getTensorStrides(char const *tensorName) const noexcept
Return the strides of the buffer for the given tensor name.
Definition: NvInferRuntime.h:4336
bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
Set the verbosity of the NVTX markers in the execution context.
Definition: NvInferRuntime.h:4948
IProfiler * getProfiler() const noexcept
Get the profiler.
Definition: NvInferRuntime.h:4232
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:4458
void setDeviceMemoryV2(void *memory, int64_t size) noexcept
Set the device memory and its corresponding size for use by this execution context.
Definition: NvInferRuntime.h:4315
bool allInputDimensionsSpecified() const noexcept
Whether all dynamic dimensions of input tensors have been specified.
Definition: NvInferRuntime.h:4422
bool getUnfusedTensorsDebugState() const noexcept
Get the debug state of unfused tensors.
Definition: NvInferRuntime.h:5093
void setProfiler(IProfiler *profiler) noexcept
Set the profiler.
Definition: NvInferRuntime.h:4222
void setName(char const *name) noexcept
Set the name of the execution context.
Definition: NvInferRuntime.h:4256
cudaEvent_t getInputConsumedEvent() const noexcept
The event associated with consuming the input.
Definition: NvInferRuntime.h:4793
IRuntimeConfig * getRuntimeConfig() const noexcept
Get the runtime config object used during execution context creation.
Definition: NvInferRuntime.h:5054
Object for constructing IDimensionExpr.
Definition: NvInferRuntime.h:287
IDimensionExpr const * operation(DimensionOperation op, IDimensionExpr const &first, IDimensionExpr const &second) noexcept
Get the operation.
Definition: NvInferRuntime.h:303
virtual ~IExprBuilder() noexcept=default
IDimensionExpr const * constant(int64_t value) noexcept
Return pointer to IDimensionExpr for given value.
Definition: NvInferRuntime.h:292
apiv::VExprBuilder * mImpl
Definition: NvInferRuntime.h:310
IDimensionExpr const * declareSizeTensor(int32_t outputIndex, IDimensionExpr const &opt, IDimensionExpr const &upper)
Declare a size tensor at the given output index, with the specified auto-tuning formula and upper bou...
Definition: NvInferRuntime.h:338
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:142
void * data() const noexcept
A pointer to the raw data that is owned by the library.
Definition: NvInferRuntime.h:147
DataType type() const noexcept
The type of the memory that was allocated.
Definition: NvInferRuntime.h:159
std::size_t size() const noexcept
The size in bytes of the data that was allocated.
Definition: NvInferRuntime.h:153
apiv::VHostMemory * mImpl
Definition: NvInferRuntime.h:165
virtual ~IHostMemory() noexcept=default
A virtual base class to find a logger. Allows a plugin to find an instance of a logger if it needs to...
Definition: NvInferRuntime.h:5353
virtual ILogger * findLogger()=0
Get the logger used by the engine or execution context which called the plugin method.
virtual ~ILoggerFinder()=default
Application-implemented logging interface for the builder, refitter and runtime.
Definition: NvInferRuntime.h:1542
virtual ~ILogger()=default
Severity
The severity corresponding to a log message.
Definition: NvInferRuntime.h:1550
virtual void log(Severity severity, AsciiChar const *msg) noexcept=0
A callback implemented by the application to handle logging messages;.
Forward declaration of IEngineInspector for use by other interfaces.
Definition: NvInferRuntime.h:51
INoCopy & operator=(INoCopy &&other)=delete
INoCopy(INoCopy const &other)=delete
INoCopy(INoCopy &&other)=delete
virtual ~INoCopy()=default
INoCopy & operator=(INoCopy const &other)=delete
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:2627
TRT_DEPRECATED int32_t const * getShapeValues(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2748
apiv::VOptimizationProfile * mImpl
Definition: NvInferRuntime.h:2864
Dims getDimensions(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:2668
TRT_DEPRECATED bool setShapeValues(char const *inputName, OptProfileSelector select, int32_t const *values, int32_t nbValues) noexcept
Set the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2720
virtual ~IOptimizationProfile() noexcept=default
float getExtraMemoryTarget() const noexcept
Get the extra memory target that has been defined for this profile.
Definition: NvInferRuntime.h:2778
bool setExtraMemoryTarget(float target) noexcept
Set a target for extra GPU memory that may be used by this profile.
Definition: NvInferRuntime.h:2766
bool setDimensions(char const *inputName, OptProfileSelector select, Dims const &dims) noexcept
Set the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:2656
bool isValid() const noexcept
Check whether the optimization profile can be passed to an IBuilderConfig object.
Definition: NvInferRuntime.h:2795
int64_t const * getShapeValuesV2(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2858
bool setShapeValuesV2(char const *inputName, OptProfileSelector select, int64_t const *values, int32_t nbValues) noexcept
Set the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2845
int32_t getNbShapeValues(char const *inputName) const noexcept
Get the number of values for an input shape tensor.
Definition: NvInferRuntime.h:2734
Single registration point for all plugins in an application. It is used to find plugin implementation...
Definition: NvInferRuntimeCommon.h:56
virtual TRT_DEPRECATED bool registerCreator(IPluginCreator &creator, AsciiChar const *const pluginNamespace) noexcept=0
Register a plugin creator implementing IPluginCreator. Returns false if any plugin creator with the s...
Interface for plugins to access per context resources provided by TensorRT.
Definition: NvInferRuntime.h:767
virtual IErrorRecorder * getErrorRecorder() const noexcept=0
Get the error recorder associated with the resource context.
IPluginResourceContext & operator=(IPluginResourceContext const &) &=default
virtual IGpuAllocator * getGpuAllocator() const noexcept=0
Get the GPU allocator associated with the resource context.
Similar to IPluginV2Ext, but with support for dynamic shapes.
Definition: NvInferRuntime.h:407
IPluginV2DynamicExt * clone() const noexcept override=0
Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin...
virtual ~IPluginV2DynamicExt() noexcept
Definition: NvInferRuntime.h:558
Plugin class for user-implemented layers.
Definition: NvInferRuntimePlugin.h:474
Updates weights in an engine.
Definition: NvInferRuntime.h:2147
bool refitCudaEngineAsync(cudaStream_t stream) noexcept
Enqueue weights refitting of the associated engine on the given stream.
Definition: NvInferRuntime.h:2549
int32_t getMaxThreads() const noexcept
get the maximum number of threads that can be used by the refitter.
Definition: NvInferRuntime.h:2428
TensorLocation getWeightsLocation(char const *weightsName) const noexcept
Get location for the weights associated with the given name.
Definition: NvInferRuntime.h:2487
bool setNamedWeights(char const *name, Weights weights) noexcept
Specify new weights of given name.
Definition: NvInferRuntime.h:2352
int32_t getAllWeights(int32_t size, char const **weightsNames) noexcept
Get names of all weights that could be refit.
Definition: NvInferRuntime.h:2388
ILogger * getLogger() const noexcept
get the logger with which the refitter was created
Definition: NvInferRuntime.h:2398
bool refitCudaEngine() noexcept
Refits associated engine.
Definition: NvInferRuntime.h:2183
int32_t getMissingWeights(int32_t size, char const **weightsNames) noexcept
Get names of missing weights.
Definition: NvInferRuntime.h:2372
TRT_DEPRECATED float getDynamicRangeMax(char const *tensorName) const noexcept
Get maximum of dynamic range.
Definition: NvInferRuntime.h:2275
int32_t getMissing(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of missing weights.
Definition: NvInferRuntime.h:2204
Weights getNamedWeights(char const *weightsName) const noexcept
Get weights associated with the given name.
Definition: NvInferRuntime.h:2471
bool unsetNamedWeights(char const *weightsName) noexcept
Unset weights associated with the given name.
Definition: NvInferRuntime.h:2503
Weights getWeightsPrototype(char const *weightsName) const noexcept
Get the Weights prototype associated with the given name.
Definition: NvInferRuntime.h:2567
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:2414
TRT_DEPRECATED float getDynamicRangeMin(char const *tensorName) const noexcept
Get minimum of dynamic range.
Definition: NvInferRuntime.h:2259
TRT_DEPRECATED int32_t getTensorsWithDynamicRange(int32_t size, char const **tensorNames) const noexcept
Get names of all tensors that have refittable dynamic ranges.
Definition: NvInferRuntime.h:2293
bool setNamedWeights(char const *name, Weights weights, TensorLocation location) noexcept
Specify new weights on a specified device of given name.
Definition: NvInferRuntime.h:2455
void setWeightsValidation(bool weightsValidation) noexcept
Set whether to validate weights during refitting.
Definition: NvInferRuntime.h:2519
TRT_DEPRECATED bool setDynamicRange(char const *tensorName, float min, float max) noexcept
Definition: NvInferRuntime.h:2243
apiv::VRefitter * mImpl
Definition: NvInferRuntime.h:2573
int32_t getAll(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of all weights that could be refit.
Definition: NvInferRuntime.h:2221
virtual ~IRefitter() noexcept=default
bool getWeightsValidation() const noexcept
Get whether to validate weights values during refitting.
Definition: NvInferRuntime.h:2527
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:2312
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:2327
A class for runtime configuration. This class is used during execution context creation.
Definition: NvInferRuntime.h:3083
virtual ~IRuntimeConfig() noexcept=default
apiv::VRuntimeConfig * mImpl
Definition: NvInferRuntime.h:3109
ExecutionContextAllocationStrategy getExecutionContextAllocationStrategy() const noexcept
Get the execution context allocation strategy.
Definition: NvInferRuntime.h:3102
Allows a serialized functionally unsafe engine to be deserialized.
Definition: NvInferRuntime.h:1819
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:1992
IRuntime * loadRuntime(char const *path) noexcept
Load IRuntime from the file.
Definition: NvInferRuntime.h:2108
bool getEngineHostCodeAllowed() const noexcept
Get whether the runtime is allowed to deserialize engines with host executable code.
Definition: NvInferRuntime.h:2130
TempfileControlFlags getTempfileControlFlags() const noexcept
Get the tempfile control flags for this runtime.
Definition: NvInferRuntime.h:2080
void setEngineHostCodeAllowed(bool allowed) noexcept
Set whether the runtime is allowed to deserialize engines with host executable code.
Definition: NvInferRuntime.h:2120
virtual ~IRuntime() noexcept=default
void setTemporaryDirectory(char const *path) noexcept
Set the directory that will be used by this runtime for temporary files.
Definition: NvInferRuntime.h:2041
IPluginRegistry & getPluginRegistry() noexcept
Get the local plugin registry that can be used by the runtime.
Definition: NvInferRuntime.h:2090
apiv::VRuntime * mImpl
Definition: NvInferRuntime.h:2136
int32_t getNbDLACores() const noexcept
Returns number of DLA hardware cores accessible or 0 if DLA is unavailable.
Definition: NvInferRuntime.h:1852
ICudaEngine * deserializeCudaEngine(void const *blob, std::size_t size) noexcept
Deserialize an engine from host memory.
Definition: NvInferRuntime.h:1920
void setTempfileControlFlags(TempfileControlFlags flags) noexcept
Set the tempfile control flags for this runtime.
Definition: NvInferRuntime.h:2068
int32_t getDLACore() const noexcept
Get the DLA core that the engine executes on.
Definition: NvInferRuntime.h:1844
void setGpuAllocator(IGpuAllocator *allocator) noexcept
Set the GPU allocator.
Definition: NvInferRuntime.h:1868
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:1902
ICudaEngine * deserializeCudaEngine(IStreamReaderV2 &streamReader)
Deserialize an engine from a stream. IStreamReaderV2 is expected to support reading to both host and ...
Definition: NvInferRuntime.h:1967
ILogger * getLogger() const noexcept
get the logger with which the runtime was created
Definition: NvInferRuntime.h:1977
int32_t getMaxThreads() const noexcept
Get the maximum number of threads that can be used by the runtime.
Definition: NvInferRuntime.h:2006
char const * getTemporaryDirectory() const noexcept
Get the directory that will be used by this runtime for temporary files.
Definition: NvInferRuntime.h:2052
TRT_DEPRECATED ICudaEngine * deserializeCudaEngine(IStreamReader &streamReader)
Deserialize an engine from a stream.
Definition: NvInferRuntime.h:1944
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:1887
Holds properties for configuring an engine to serialize the binary.
Definition: NvInferRuntime.h:2974
virtual ~ISerializationConfig() noexcept=default
bool clearFlag(SerializationFlag serializationFlag) noexcept
clear a serialization flag.
Definition: NvInferRuntime.h:3013
bool setFlag(SerializationFlag serializationFlag) noexcept
Set a serialization flag.
Definition: NvInferRuntime.h:3025
SerializationFlags getFlags() const noexcept
Get the serialization flags for this config.
Definition: NvInferRuntime.h:3001
bool getFlag(SerializationFlag serializationFlag) const noexcept
Returns true if the serialization flag is set.
Definition: NvInferRuntime.h:3037
apiv::VSerializationConfig * mImpl
Definition: NvInferRuntime.h:3043
An Interface class for version control.
Definition: NvInferRuntimeBase.h:276
Version information associated with a TRT interface.
Definition: NvInferRuntimeBase.h:241
Register the plugin creator to the registry The static registry object will be instantiated when the ...
Definition: NvInferRuntime.h:5325
PluginRegistrar()
Definition: NvInferRuntime.h:5327
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:124
DataType type
The type of the weights.
Definition: NvInferRuntime.h:126
int64_t count
The number of weights in the array.
Definition: NvInferRuntime.h:128
void const * values
The weight values, in a contiguous array.
Definition: NvInferRuntime.h:127
Definition: NvInferRuntime.h:4141
virtual bool processDebugTensor(void const *addr, TensorLocation location, DataType type, Dims const &shape, char const *name, cudaStream_t stream)=0
Callback function that is called when a debug tensor’s value is updated and the debug state of the te...
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:4146
~IDebugListener() override=default
Definition: NvInferRuntimeBase.h:413
Definition: NvInferRuntime.h:1610
virtual void * allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t) noexcept
A thread-safe callback implemented by the application to handle stream-ordered acquisition of GPU mem...
Definition: NvInferRuntime.h:1732
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1773
virtual TRT_DEPRECATED bool deallocate(void *const memory) noexcept=0
A thread-safe callback implemented by the application to handle release of GPU memory.
~IGpuAllocator() override=default
virtual void * reallocate(void *const, uint64_t, uint64_t) noexcept
A thread-safe callback implemented by the application to resize an existing allocation.
Definition: NvInferRuntime.h:1679
virtual TRT_DEPRECATED void * allocate(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept=0
A thread-safe callback implemented by the application to handle acquisition of GPU memory.
virtual bool deallocateAsync(void *const memory, cudaStream_t) noexcept
A thread-safe callback implemented by the application to handle stream-ordered release of GPU memory.
Definition: NvInferRuntime.h:1765
Definition: NvInferRuntime.h:5374
bool deallocateAsync(void *const memory, cudaStream_t) noexcept override=0
A thread-safe callback implemented by the application to handle stream-ordered asynchronous release o...
void * allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t) noexcept override=0
A thread-safe callback implemented by the application to handle stream-ordered asynchronous acquisiti...
TRT_DEPRECATED void * allocate(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept override
A thread-safe callback implemented by the application to handle acquisition of GPU memory.
Definition: NvInferRuntime.h:5461
TRT_DEPRECATED bool deallocate(void *const memory) noexcept override
A thread-safe callback implemented by the application to handle release of GPU memory.
Definition: NvInferRuntime.h:5485
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:5493
~IGpuAsyncAllocator() override=default
Definition: NvInferRuntime.h:4054
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:4059
virtual void * reallocateOutputAsync(char const *tensorName, void *currentMemory, uint64_t size, uint64_t alignment, cudaStream_t)
Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated....
Definition: NvInferRuntime.h:4111
virtual void notifyShape(char const *tensorName, Dims const &dims) noexcept=0
Called by TensorRT when the shape of the output tensor is known.
virtual TRT_DEPRECATED void * reallocateOutput(char const *tensorName, void *currentMemory, uint64_t size, uint64_t alignment) noexcept
Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated....
Definition: NvInferRuntime.h:4083
Definition: NvInferPluginBase.h:141
Definition: NvInferPluginBase.h:193
Definition: NvInferRuntime.h:5500
virtual PluginFieldCollection const * getFieldNames() noexcept=0
Return a list of fields that need to be passed to createPlugin() when creating a plugin for use in th...
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:5505
virtual IPluginV3 * createPlugin(AsciiChar const *name, PluginFieldCollection const *fc, TensorRTPhase phase) noexcept=0
Return a plugin object. Return nullptr in case of error.
Definition: NvInferPluginBase.h:206
Definition: NvInferRuntime.h:836
virtual int32_t getFormatCombinationLimit() noexcept
Return the maximum number of format combinations that will be timed by TensorRT during the build phas...
Definition: NvInferRuntime.h:1040
virtual int32_t getNbOutputs() const noexcept=0
Get the number of outputs from the plugin.
virtual int32_t configurePlugin(DynamicPluginTensorDesc const *in, int32_t nbInputs, DynamicPluginTensorDesc const *out, int32_t nbOutputs) noexcept=0
Configure the plugin.
virtual int32_t getNbTactics() noexcept
Query for the number of custom tactics the plugin intends to use.
Definition: NvInferRuntime.h:1016
virtual char const * getMetadataString() noexcept
Query for a string representing the configuration of the plugin. May be called anytime after plugin c...
Definition: NvInferRuntime.h:1051
virtual char const * getTimingCacheID() noexcept
Called to query the suffix to use for the timing cache ID. May be called anytime after plugin creatio...
Definition: NvInferRuntime.h:1032
virtual bool supportsFormatCombination(int32_t pos, DynamicPluginTensorDesc const *inOut, int32_t nbInputs, int32_t nbOutputs) noexcept=0
Return true if plugin supports the format and datatype for the input/output indexed by pos.
virtual int32_t getOutputDataTypes(DataType *outputTypes, int32_t nbOutputs, const DataType *inputTypes, int32_t nbInputs) const noexcept=0
Provide the data types of the plugin outputs if the input tensors have the data types provided.
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:848
virtual int32_t getOutputShapes(DimsExprs const *inputs, int32_t nbInputs, DimsExprs const *shapeInputs, int32_t nbShapeInputs, DimsExprs *outputs, int32_t nbOutputs, IExprBuilder &exprBuilder) noexcept=0
Provide expressions for computing dimensions of the output tensors from dimensions of the input tenso...
virtual int32_t getValidTactics(int32_t *tactics, int32_t nbTactics) noexcept
Query for any custom tactics that the plugin intends to use.
Definition: NvInferRuntime.h:1008
Definition: NvInferRuntime.h:793
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:798
virtual AsciiChar const * getPluginName() const noexcept=0
Return the plugin name. Should match the plugin name returned by the corresponding plugin creator.
Definition: NvInferRuntime.h:1058
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1063
virtual int32_t onShapeChange(PluginTensorDesc const *in, int32_t nbInputs, PluginTensorDesc const *out, int32_t nbOutputs) noexcept=0
Called when a plugin is being prepared for execution for specific dimensions. This could happen multi...
virtual PluginFieldCollection const * getFieldsToSerialize() noexcept=0
Get the plugin fields which should be serialized.
virtual int32_t setTactic(int32_t tactic) noexcept
Set the tactic to be used in the subsequent call to enqueue(). If no custom tactics were advertised,...
Definition: NvInferRuntime.h:1075
virtual int32_t enqueue(PluginTensorDesc const *inputDesc, PluginTensorDesc const *outputDesc, void const *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept=0
Execute the layer.
virtual IPluginV3 * attachToContext(IPluginResourceContext *context) noexcept=0
Clone the plugin, attach the cloned plugin object to a execution context and grant the cloned plugin ...
Definition: NvInferRuntime.h:1246
virtual ~IProfiler() noexcept
Definition: NvInferRuntime.h:1257
virtual void reportLayerTime(char const *layerName, float ms) noexcept=0
Layer time reporting callback.
Definition: NvInferRuntime.h:631
~IStreamReader() override=default
IStreamReader & operator=(IStreamReader const &) &=default
IStreamReader & operator=(IStreamReader &&) &=default
virtual int64_t read(void *destination, int64_t nbBytes)=0
Read the next number of bytes in the stream.
IStreamReader(IStreamReader &&)=default
IStreamReader(IStreamReader const &)=default
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:643
Definition: NvInferRuntime.h:695
IStreamReaderV2 & operator=(IStreamReaderV2 const &) &=default
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:707
IStreamReaderV2(IStreamReaderV2 &&)=default
~IStreamReaderV2() override=default
virtual int64_t read(void *destination, int64_t nbBytes, cudaStream_t stream) noexcept=0
Read the next number of bytes in the stream asynchronously.
IStreamReaderV2(IStreamReaderV2 const &)=default
virtual bool seek(int64_t offset, SeekPosition where) noexcept=0
Sets the position of the stream to the given offset.
IStreamReaderV2 & operator=(IStreamReaderV2 &&) &=default
Definition: NvInferRuntime.h:1155
virtual int32_t getAliasedInput(int32_t outputIndex) noexcept
Communicates to TensorRT that the output at the specified output index is aliased to the input at the...
Definition: NvInferRuntime.h:1191
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1157
IRefitter * createInferRefitter(ICudaEngine &engine, ILogger &logger) noexcept
Create an instance of an IRefitter class.
Definition: NvInferRuntime.h:5305
IRuntime * createInferRuntime(ILogger &logger) noexcept
Create an instance of an IRuntime class.
Definition: NvInferRuntime.h:5294
The TensorRT API version 1 namespace.
uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferRuntime.h:2913
v_1_0::IOutputAllocator IOutputAllocator
Definition: NvInferRuntime.h:4136
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:76
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:179
@ kSUB
Substract the second element from the first.
@ kSUM
Sum of the two operands.
@ kPROD
Product of the two operands.
@ kFLOOR_DIV
Floor division of the first element by the second.
@ kEQUAL
1 if operands are equal, 0 otherwise.
@ kMIN
Minimum of the two operands.
@ kLESS
1 if first operand is less than second operand, 0 otherwise.
@ kMAX
Maximum of the two operands.
@ kCEIL_DIV
Division rounding up.
v_1_0::IPluginV3OneCore IPluginV3OneCore
Definition: NvInferRuntime.h:1208
TensorIOMode
Definition of tensor IO Mode.
Definition: NvInferRuntimeBase.h:656
HardwareCompatibilityLevel
Describes requirements of compatibility with GPU architectures other than that of the GPU on which th...
Definition: NvInfer.h:9324
SerializationFlag
List of valid flags that the engine can enable when serializing the bytes.
Definition: NvInferRuntime.h:2954
@ kEXCLUDE_WEIGHTS
Exclude the weights that can be refitted.
v_1_0::IProfiler IProfiler
Definition: NvInferRuntime.h:1272
SeekPosition
Controls the seek mode of IStreamReaderV2.
Definition: NvInferRuntime.h:681
@ kSET
From the beginning of the file.
@ kCUR
From the current position of the file.
@ kEND
From the tail of the file.
v_1_0::IStreamReaderV2 IStreamReaderV2
Definition: NvInferRuntime.h:751
uint32_t TempfileControlFlags
Represents a collection of one or more TempfileControlFlag values combined using bitwise-OR operation...
Definition: NvInferRuntime.h:1350
v_1_0::IGpuAllocator IGpuAllocator
Definition: NvInferRuntime.h:1809
constexpr int32_t EnumMax< WeightsRole >() noexcept
Maximum number of elements in WeightsRole enum.
Definition: NvInferRuntime.h:1293
char_t AsciiChar
Definition: NvInferRuntimeBase.h:115
TensorRTPhase
Indicates a phase of operation of TensorRT.
Definition: NvInferPluginBase.h:116
@ kV2_DYNAMICEXT
IPluginV2DynamicExt.
constexpr int32_t EnumMax< LayerInformationFormat >() noexcept
Definition: NvInferRuntime.h:5118
DataType
The type of weights and tensors.
Definition: NvInferRuntimeBase.h:143
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:1304
@ kSCALE
Scale layer.
@ kCONSTANT
Constant layer.
v_1_0::IDebugListener IDebugListener
Definition: NvInferRuntime.h:4177
TempfileControlFlag
Flags used to control TensorRT's behavior when creating executable temporary files.
Definition: NvInferRuntime.h:1327
@ kALLOW_IN_MEMORY_FILES
Allow creating and loading files in-memory (or unnamed files).
constexpr int32_t EnumMax< OptProfileSelector >() noexcept
Number of different values of OptProfileSelector enum.
Definition: NvInferRuntime.h:2599
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:1282
@ kSHIFT
shift part of IScaleLayer
@ kANY
Any other weights role.
@ kBIAS
bias for IConvolutionLayer or IDeconvolutionLayer
@ kKERNEL
kernel for IConvolutionLayer or IDeconvolutionLayer
constexpr int32_t EnumMax< ProfilingVerbosity >() noexcept
Maximum number of profile verbosity levels in ProfilingVerbosity enum.
Definition: NvInferRuntime.h:2933
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
Definition: NvInferRuntime.h:2925
@ kLAYER_NAMES_ONLY
Print only the layer names. This is the default setting.
@ kDETAILED
Print detailed layer information including layer names and layer parameters.
TacticSource
List of tactic sources for TensorRT.
Definition: NvInferRuntime.h:2876
TensorFormat PluginFormat
PluginFormat is reserved for backward compatibility.
Definition: NvInferRuntimePlugin.h:54
v_1_0::IPluginV3OneRuntime IPluginV3OneRuntime
Definition: NvInferRuntime.h:1232
@ kMIN
Minimum of the two elements.
constexpr int32_t EnumMax< TempfileControlFlag >() noexcept
Maximum number of elements in TempfileControlFlag enum.
Definition: NvInferRuntime.h:1339
uint32_t SerializationFlags
Represents one or more SerializationFlag values using binary OR operations, e.g., 1U << Serialization...
Definition: NvInferRuntime.h:2944
@ kLINEAR
Supports linear (1D), bilinear (2D), and trilinear (3D) interpolation.
v_1_0::IPluginV3OneBuild IPluginV3OneBuild
Definition: NvInferRuntime.h:1220
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntime.h:1384
ExecutionContextAllocationStrategy
Different memory allocation behaviors for IExecutionContext.
Definition: NvInferRuntime.h:3058
@ kSTATIC
Default static allocation with the maximum size across all profiles.
@ kUSER_MANAGED
The user supplies custom allocation to the execution context.
@ kON_PROFILE_CHANGE
Reallocate for a profile when it's selected.
constexpr int32_t EnumMax< TacticSource >() noexcept
Maximum number of tactic sources in TacticSource enum.
Definition: NvInferRuntime.h:2902
LayerInformationFormat
The format in which the IEngineInspector prints the layer information.
Definition: NvInferRuntime.h:5110
@ kJSON
Print layer information in JSON format.
@ kONELINE
Print layer information in one line per layer.
v_1_0::IStreamReader IStreamReader
Definition: NvInferRuntime.h:674
AllocatorFlag
Allowed type of memory allocation.
Definition: NvInferRuntime.h:1508
@ kRESIZABLE
TensorRT may call realloc() on this allocation.
@ kMAX
Maximum over elements.
constexpr int32_t EnumMax< DeviceType >() noexcept
Maximum number of elements in DeviceType enum.
Definition: NvInferRuntime.h:1311
constexpr int32_t EnumMax< DimensionOperation >() noexcept
Maximum number of elements in DimensionOperation enum.
Definition: NvInferRuntime.h:193
constexpr int32_t EnumMax< ExecutionContextAllocationStrategy >() noexcept
Maximum number of memory allocation strategies in ExecutionContextAllocationStrategy enum.
Definition: NvInferRuntime.h:3070
constexpr int32_t EnumMax< SerializationFlag >() noexcept
Maximum number of serialization flags in SerializationFlag enum.
Definition: NvInferRuntime.h:2961
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntime.h:204
@ kHOST
Data stored on host.
@ kDEVICE
Data stored on device.
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:2587
@ kOPT
This is used to set or get the value that is used in the optimization (kernel selection).
uint32_t AllocatorFlags
Definition: NvInferRuntime.h:1524
Summarizes tensors that a plugin might see for an input or output.
Definition: NvInferRuntime.h:362
Dims min
Lower bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:367
Dims max
Upper bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:370
Dims opt
Optimum value of tensor’s dimensions specified for auto-tuning.
Definition: NvInferRuntime.h:373
PluginTensorDesc desc
Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of ...
Definition: NvInferRuntime.h:364
Plugin field collection struct.
Definition: NvInferPluginBase.h:103
Fields that a plugin might see for an input or output.
Definition: NvInferRuntimePlugin.h:73
Declaration of EnumMaxImpl struct to store maximum number of elements in an enumeration type.
Definition: NvInferRuntimeBase.h:128

  Copyright © 2024 NVIDIA Corporation
  Privacy Policy | Manage My Privacy | Do Not Sell or Share My Data | Terms of Service | Accessibility | Corporate Policies | Product Security | Contact