TensorRT 10.7.0
NvInferRuntime.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef NV_INFER_RUNTIME_H
19#define NV_INFER_RUNTIME_H
20
26
27#include "NvInferImpl.h"
28#define NV_INFER_INTERNAL_INCLUDE 1
29#include "NvInferPluginBase.h"
30#undef NV_INFER_INTERNAL_INCLUDE
32
33namespace nvinfer1
34{
35
36class IExecutionContext;
37class ICudaEngine;
38class IPluginFactory;
39class IEngineInspector;
40
49
51{
52protected:
53 INoCopy() = default;
54 virtual ~INoCopy() = default;
55 INoCopy(INoCopy const& other) = delete;
56 INoCopy& operator=(INoCopy const& other) = delete;
57 INoCopy(INoCopy&& other) = delete;
58 INoCopy& operator=(INoCopy&& other) = delete;
59};
60
75enum class EngineCapability : int32_t
76{
81 kSTANDARD = 0,
82
89 kSAFETY = 1,
90
97};
98
99namespace impl
100{
102template <>
104{
105 static constexpr int32_t kVALUE = 3;
106};
107} // namespace impl
108
124{
125public:
127 void const* values;
128 int64_t count;
129};
130
141class IHostMemory : public INoCopy
142{
143public:
144 virtual ~IHostMemory() noexcept = default;
145
147 void* data() const noexcept
148 {
149 return mImpl->data();
150 }
151
153 std::size_t size() const noexcept
154 {
155 return mImpl->size();
156 }
157
159 DataType type() const noexcept
160 {
161 return mImpl->type();
162 }
163
164protected:
165 apiv::VHostMemory* mImpl;
166};
167
178enum class DimensionOperation : int32_t
179{
180 kSUM = 0,
181 kPROD = 1,
182 kMAX = 2,
183 kMIN = 3,
184 kSUB = 4,
185 kEQUAL = 5,
186 kLESS = 6,
187 kFLOOR_DIV = 7,
188 kCEIL_DIV = 8
189};
190
192template <>
193constexpr inline int32_t EnumMax<DimensionOperation>() noexcept
194{
195 return 9;
196}
197
203enum class TensorLocation : int32_t
204{
205 kDEVICE = 0,
206 kHOST = 1,
207};
208
209namespace impl
210{
212template <>
214{
215 static constexpr int32_t kVALUE = 2;
216};
217} // namespace impl
218
232{
233public:
237 bool isConstant() const noexcept
238 {
239 return mImpl->isConstant();
240 }
241
248 int64_t getConstantValue() const noexcept
249 {
250 return mImpl->getConstantValue();
251 }
252
253protected:
254 apiv::VDimensionExpr* mImpl;
255 virtual ~IDimensionExpr() noexcept = default;
256
257public:
263 bool isSizeTensor() const noexcept
264 {
265 return mImpl->isSizeTensor();
266 }
267};
268
286class IExprBuilder : public INoCopy
287{
288public:
292 IDimensionExpr const* constant(int64_t value) noexcept
293 {
294 return mImpl->constant(value);
295 }
296
304 DimensionOperation op, IDimensionExpr const& first, IDimensionExpr const& second) noexcept
305 {
306 return mImpl->operation(op, first, second);
307 }
308
309protected:
310 apiv::VExprBuilder* mImpl;
311 virtual ~IExprBuilder() noexcept = default;
312
313public:
338 IDimensionExpr const* declareSizeTensor(int32_t outputIndex, IDimensionExpr const& opt, IDimensionExpr const& upper)
339 {
340 return mImpl->declareSizeTensor(outputIndex, opt, upper);
341 }
342};
343
350{
351public:
352 int32_t nbDims;
354};
355
362{
365
368
371
374};
375
407{
408public:
409 IPluginV2DynamicExt* clone() const noexcept override = 0;
410
435 virtual DimsExprs getOutputDimensions(
436 int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept = 0;
437
441 static constexpr int32_t kFORMAT_COMBINATION_LIMIT = 100;
442
475 virtual bool supportsFormatCombination(
476 int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
477
515 virtual void configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs,
516 DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
517
527 virtual size_t getWorkspaceSize(PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs,
528 int32_t nbOutputs) const noexcept = 0;
529
542 virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc,
543 void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0;
544
545protected:
553 int32_t getTensorRTVersion() const noexcept override
554 {
555 return (static_cast<int32_t>(PluginVersion::kV2_DYNAMICEXT) << 24 | (NV_TENSORRT_VERSION & 0xFFFFFF));
556 }
557
558 virtual ~IPluginV2DynamicExt() noexcept {}
559
560private:
561 // Following are obsolete base class methods, and must not be implemented or used.
562
566 void configurePlugin(Dims const*, int32_t, Dims const*, int32_t, DataType const*, DataType const*, bool const*,
567 bool const*, PluginFormat, int32_t) noexcept override final
568 {
569 }
570
574 bool supportsFormat(DataType, PluginFormat) const noexcept override final
575 {
576 return false;
577 }
578
582 Dims getOutputDimensions(int32_t, Dims const*, int32_t) noexcept override final
583 {
584 return Dims{-1, {}};
585 }
586
594 TRT_DEPRECATED bool isOutputBroadcastAcrossBatch(int32_t, bool const*, int32_t) const noexcept override final
595 {
596 return false;
597 }
598
606 TRT_DEPRECATED bool canBroadcastInputAcrossBatch(int32_t) const noexcept override final
607 {
608 return true;
609 }
610
614 size_t getWorkspaceSize(int32_t) const noexcept override final
615 {
616 return 0;
617 }
618
622 int32_t enqueue(int32_t, void const* const*, void* const*, void*, cudaStream_t) noexcept override final
623 {
624 return 1;
625 }
626};
627
628namespace v_1_0
629{
631{
632public:
637 ~IStreamReader() override = default;
638 IStreamReader() = default;
639
643 InterfaceInfo getInterfaceInfo() const noexcept override
644 {
645 return InterfaceInfo{"IStreamReader", 1, 0};
646 }
647
656 virtual int64_t read(void* destination, int64_t nbBytes) = 0;
657
658protected:
659 IStreamReader(IStreamReader const&) = default;
663};
664} // namespace v_1_0
665
675
680enum class SeekPosition : int32_t
681{
683 kSET = 0,
684
686 kCUR = 1,
687
689 kEND = 2,
690};
691
692namespace v_1_0
693{
695{
696public:
701 ~IStreamReaderV2() override = default;
702 IStreamReaderV2() = default;
703
707 InterfaceInfo getInterfaceInfo() const noexcept override
708 {
709 return InterfaceInfo{"IStreamReaderV2", 1, 0};
710 }
711
722 virtual int64_t read(void* destination, int64_t nbBytes, cudaStream_t stream) noexcept = 0;
723
732 virtual bool seek(int64_t offset, SeekPosition where) noexcept = 0;
733
734protected:
739};
740} // namespace v_1_0
741
752
767{
768public:
773 virtual IGpuAllocator* getGpuAllocator() const noexcept = 0;
774
779 virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
780 virtual ~IPluginResourceContext() noexcept = default;
781
782protected:
786 IPluginResourceContext& operator=(IPluginResourceContext const&) & = default;
788};
789
790namespace v_1_0
791{
793{
794public:
798 InterfaceInfo getInterfaceInfo() const noexcept override
799 {
800 return InterfaceInfo{"PLUGIN_V3ONE_CORE", 1, 0};
801 }
802
811 virtual AsciiChar const* getPluginName() const noexcept = 0;
812
821 virtual AsciiChar const* getPluginVersion() const noexcept = 0;
822
832 virtual AsciiChar const* getPluginNamespace() const noexcept = 0;
833};
834
836{
837public:
843 static constexpr int32_t kDEFAULT_FORMAT_COMBINATION_LIMIT = 100;
844
848 InterfaceInfo getInterfaceInfo() const noexcept override
849 {
850 return InterfaceInfo{"PLUGIN_V3ONE_BUILD", 1, 0};
851 }
852
872 virtual int32_t configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs,
873 DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
874
891 virtual int32_t getOutputDataTypes(
892 DataType* outputTypes, int32_t nbOutputs, const DataType* inputTypes, int32_t nbInputs) const noexcept = 0;
893
914 virtual int32_t getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, DimsExprs const* shapeInputs,
915 int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept = 0;
916
952 int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
953
959 virtual int32_t getNbOutputs() const noexcept = 0;
960
970 virtual size_t getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs,
971 DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept
972 {
973 return 0;
974 }
975
1007 virtual int32_t getValidTactics(int32_t* tactics, int32_t nbTactics) noexcept
1008 {
1009 return 0;
1010 }
1011
1015 virtual int32_t getNbTactics() noexcept
1016 {
1017 return 0;
1018 }
1019
1031 virtual char const* getTimingCacheID() noexcept
1032 {
1033 return nullptr;
1034 }
1035
1039 virtual int32_t getFormatCombinationLimit() noexcept
1040 {
1041 return kDEFAULT_FORMAT_COMBINATION_LIMIT;
1042 }
1043
1050 virtual char const* getMetadataString() noexcept
1051 {
1052 return nullptr;
1053 }
1054};
1055
1057{
1058public:
1062 InterfaceInfo getInterfaceInfo() const noexcept override
1063 {
1064 return InterfaceInfo{"PLUGIN_V3ONE_RUNTIME", 1, 0};
1065 }
1066
1074 virtual int32_t setTactic(int32_t tactic) noexcept
1075 {
1076 return 0;
1077 }
1078
1097 virtual int32_t onShapeChange(
1098 PluginTensorDesc const* in, int32_t nbInputs, PluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
1099
1113 virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc,
1114 void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0;
1115
1135 virtual IPluginV3* attachToContext(IPluginResourceContext* context) noexcept = 0;
1136
1142
1146 virtual PluginFieldCollection const* getFieldsToSerialize() noexcept = 0;
1147};
1148} // namespace v_1_0
1149
1150namespace v_2_0
1151{
1152
1154{
1155public:
1156 InterfaceInfo getInterfaceInfo() const noexcept override
1157 {
1158 return InterfaceInfo{"PLUGIN_V3ONE_BUILD", 2, 0};
1159 }
1160
1190 virtual int32_t getAliasedInput(int32_t outputIndex) noexcept
1191 {
1192 return -1;
1193 }
1194};
1195
1196} // namespace v_2_0
1197
1208
1220
1232
1241
1242namespace v_1_0
1243{
1245{
1246public:
1254 virtual void reportLayerTime(char const* layerName, float ms) noexcept = 0;
1255
1256 virtual ~IProfiler() noexcept {}
1257};
1258} // namespace v_1_0
1259
1272
1280enum class WeightsRole : int32_t
1281{
1282 kKERNEL = 0,
1283 kBIAS = 1,
1284 kSHIFT = 2,
1285 kSCALE = 3,
1286 kCONSTANT = 4,
1287 kANY = 5,
1288};
1289
1291template <>
1292constexpr inline int32_t EnumMax<WeightsRole>() noexcept
1293{
1294 return 6;
1295}
1296
1302enum class DeviceType : int32_t
1303{
1304 kGPU = 0,
1305 kDLA = 1,
1306};
1307
1309template <>
1310constexpr inline int32_t EnumMax<DeviceType>() noexcept
1311{
1312 return 2;
1313}
1314
1325enum class TempfileControlFlag : int32_t
1326{
1329
1334};
1335
1337template <>
1338constexpr inline int32_t EnumMax<TempfileControlFlag>() noexcept
1339{
1340 return 2;
1341}
1342
1349using TempfileControlFlags = uint32_t;
1350
1381enum class TensorFormat : int32_t
1382{
1388 kLINEAR = 0,
1389
1394 kCHW2 = 1,
1395
1399 kHWC8 = 2,
1400
1414 kCHW4 = 3,
1415
1423 kCHW16 = 4,
1424
1432 kCHW32 = 5,
1433
1438 kDHWC8 = 6,
1439
1444 kCDHW32 = 7,
1445
1449 kHWC = 8,
1450
1459 kDLA_LINEAR = 9,
1460
1474 kDLA_HWC4 = 10,
1475
1480 kHWC16 = 11,
1481
1486 kDHWC = 12
1487};
1488
1489namespace impl
1490{
1492template <>
1494{
1496 static constexpr int32_t kVALUE = 13;
1497};
1498} // namespace impl
1499
1505enum class AllocatorFlag : int32_t
1506{
1508 kRESIZABLE = 0,
1509};
1510
1511namespace impl
1512{
1514template <>
1516{
1518 static constexpr int32_t kVALUE = 1;
1519};
1520} // namespace impl
1521
1522using AllocatorFlags = uint32_t;
1523
1526
1540{
1541public:
1547 enum class Severity : int32_t
1548 {
1550 kINTERNAL_ERROR = 0,
1552 kERROR = 1,
1554 kWARNING = 2,
1556 kINFO = 3,
1558 kVERBOSE = 4,
1559 };
1560
1579 virtual void log(Severity severity, AsciiChar const* msg) noexcept = 0;
1580
1581 ILogger() = default;
1582 virtual ~ILogger() = default;
1583
1584protected:
1585 // @cond SuppressDoxyWarnings
1586 ILogger(ILogger const&) = default;
1587 ILogger(ILogger&&) = default;
1588 ILogger& operator=(ILogger const&) & = default;
1589 ILogger& operator=(ILogger&&) & = default;
1590 // @endcond
1591};
1592
1593namespace impl
1594{
1596template <>
1597struct EnumMaxImpl<ILogger::Severity>
1598{
1600 static constexpr int32_t kVALUE = 5;
1601};
1602} // namespace impl
1603
1604namespace v_1_0
1605{
1606
1608{
1609public:
1635 uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept = 0;
1636
1637 ~IGpuAllocator() override = default;
1638 IGpuAllocator() = default;
1639
1677 virtual void* reallocate(void* const /*baseAddr*/, uint64_t /*alignment*/, uint64_t /*newSize*/) noexcept
1678 {
1679 return nullptr;
1680 }
1681
1700 TRT_DEPRECATED virtual bool deallocate(void* const memory) noexcept = 0;
1701
1730 virtual void* allocateAsync(
1731 uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t /*stream*/) noexcept
1732 {
1733 return allocate(size, alignment, flags);
1734 }
1763 virtual bool deallocateAsync(void* const memory, cudaStream_t /*stream*/) noexcept
1764 {
1765 return deallocate(memory);
1766 }
1767
1771 InterfaceInfo getInterfaceInfo() const noexcept override
1772 {
1773 return {"IGpuAllocator", 1, 0};
1774 }
1775
1776protected:
1777 // @cond SuppressDoxyWarnings
1778 IGpuAllocator(IGpuAllocator const&) = default;
1779 IGpuAllocator(IGpuAllocator&&) = default;
1780 IGpuAllocator& operator=(IGpuAllocator const&) & = default;
1781 IGpuAllocator& operator=(IGpuAllocator&&) & = default;
1782 // @endcond
1783};
1784
1785} // namespace v_1_0
1786
1808
1816class IRuntime : public INoCopy
1817{
1818public:
1819 virtual ~IRuntime() noexcept = default;
1820
1832 void setDLACore(int32_t dlaCore) noexcept
1833 {
1834 mImpl->setDLACore(dlaCore);
1835 }
1836
1842 int32_t getDLACore() const noexcept
1843 {
1844 return mImpl->getDLACore();
1845 }
1846
1850 int32_t getNbDLACores() const noexcept
1851 {
1852 return mImpl->getNbDLACores();
1853 }
1854
1865 void setGpuAllocator(IGpuAllocator* allocator) noexcept
1866 {
1867 mImpl->setGpuAllocator(allocator);
1868 }
1869
1881 //
1884 void setErrorRecorder(IErrorRecorder* recorder) noexcept
1885 {
1886 mImpl->setErrorRecorder(recorder);
1887 }
1888
1900 {
1901 return mImpl->getErrorRecorder();
1902 }
1903
1914 ICudaEngine* deserializeCudaEngine(void const* blob, std::size_t size) noexcept
1915 {
1916 return mImpl->deserializeCudaEngine(blob, size);
1917 }
1918
1936 {
1937 return mImpl->deserializeCudaEngine(streamReader);
1938 }
1939
1957 {
1958 return mImpl->deserializeCudaEngineV2(streamReader);
1959 }
1960
1966 ILogger* getLogger() const noexcept
1967 {
1968 return mImpl->getLogger();
1969 }
1970
1981 bool setMaxThreads(int32_t maxThreads) noexcept
1982 {
1983 return mImpl->setMaxThreads(maxThreads);
1984 }
1985
1995 int32_t getMaxThreads() const noexcept
1996 {
1997 return mImpl->getMaxThreads();
1998 }
1999
2030 void setTemporaryDirectory(char const* path) noexcept
2031 {
2032 return mImpl->setTemporaryDirectory(path);
2033 }
2034
2041 char const* getTemporaryDirectory() const noexcept
2042 {
2043 return mImpl->getTemporaryDirectory();
2044 }
2045
2058 {
2059 return mImpl->setTempfileControlFlags(flags);
2060 }
2061
2070 {
2071 return mImpl->getTempfileControlFlags();
2072 }
2073
2080 {
2081 return mImpl->getPluginRegistry();
2082 }
2083
2097 IRuntime* loadRuntime(char const* path) noexcept
2098 {
2099 return mImpl->loadRuntime(path);
2100 }
2101
2109 void setEngineHostCodeAllowed(bool allowed) noexcept
2110 {
2111 return mImpl->setEngineHostCodeAllowed(allowed);
2112 }
2113
2119 bool getEngineHostCodeAllowed() const noexcept
2120 {
2121 return mImpl->getEngineHostCodeAllowed();
2122 }
2123
2124protected:
2125 apiv::VRuntime* mImpl;
2126};
2127
2135class IRefitter : public INoCopy
2136{
2137public:
2138 virtual ~IRefitter() noexcept = default;
2139
2155 bool setWeights(char const* layerName, WeightsRole role, Weights weights) noexcept
2156 {
2157 return mImpl->setWeights(layerName, role, weights);
2158 }
2159
2172 bool refitCudaEngine() noexcept
2173 {
2174 return mImpl->refitCudaEngine();
2175 }
2176
2193 int32_t getMissing(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
2194 {
2195 return mImpl->getMissing(size, layerNames, roles);
2196 }
2197
2210 int32_t getAll(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
2211 {
2212 return mImpl->getAll(size, layerNames, roles);
2213 }
2214
2232 TRT_DEPRECATED bool setDynamicRange(char const* tensorName, float min, float max) noexcept
2233 {
2234 return mImpl->setDynamicRange(tensorName, min, max);
2235 }
2236
2248 TRT_DEPRECATED float getDynamicRangeMin(char const* tensorName) const noexcept
2249 {
2250 return mImpl->getDynamicRangeMin(tensorName);
2251 }
2252
2264 TRT_DEPRECATED float getDynamicRangeMax(char const* tensorName) const noexcept
2265 {
2266 return mImpl->getDynamicRangeMax(tensorName);
2267 }
2268
2282 TRT_DEPRECATED int32_t getTensorsWithDynamicRange(int32_t size, char const** tensorNames) const noexcept
2283 {
2284 return mImpl->getTensorsWithDynamicRange(size, tensorNames);
2285 }
2286
2298 //
2301 void setErrorRecorder(IErrorRecorder* recorder) noexcept
2302 {
2303 mImpl->setErrorRecorder(recorder);
2304 }
2305
2317 {
2318 return mImpl->getErrorRecorder();
2319 }
2320
2341 bool setNamedWeights(char const* name, Weights weights) noexcept
2342 {
2343 return mImpl->setNamedWeights(name, weights);
2344 }
2345
2361 int32_t getMissingWeights(int32_t size, char const** weightsNames) noexcept
2362 {
2363 return mImpl->getMissingWeights(size, weightsNames);
2364 }
2365
2377 int32_t getAllWeights(int32_t size, char const** weightsNames) noexcept
2378 {
2379 return mImpl->getAllWeights(size, weightsNames);
2380 }
2381
2387 ILogger* getLogger() const noexcept
2388 {
2389 return mImpl->getLogger();
2390 }
2391
2403 bool setMaxThreads(int32_t maxThreads) noexcept
2404 {
2405 return mImpl->setMaxThreads(maxThreads);
2406 }
2407
2417 int32_t getMaxThreads() const noexcept
2418 {
2419 return mImpl->getMaxThreads();
2420 }
2421
2444 bool setNamedWeights(char const* name, Weights weights, TensorLocation location) noexcept
2445 {
2446 return mImpl->setNamedWeightsWithLocation(name, weights, location);
2447 }
2448
2460 Weights getNamedWeights(char const* weightsName) const noexcept
2461 {
2462 return mImpl->getNamedWeights(weightsName);
2463 }
2464
2476 TensorLocation getWeightsLocation(char const* weightsName) const noexcept
2477 {
2478 return mImpl->getWeightsLocation(weightsName);
2479 }
2480
2492 bool unsetNamedWeights(char const* weightsName) noexcept
2493 {
2494 return mImpl->unsetNamedWeights(weightsName);
2495 }
2496
2508 void setWeightsValidation(bool weightsValidation) noexcept
2509 {
2510 return mImpl->setWeightsValidation(weightsValidation);
2511 }
2512
2516 bool getWeightsValidation() const noexcept
2517 {
2518 return mImpl->getWeightsValidation();
2519 }
2520
2538 bool refitCudaEngineAsync(cudaStream_t stream) noexcept
2539 {
2540 return mImpl->refitCudaEngineAsync(stream);
2541 }
2542
2556 Weights getWeightsPrototype(char const* weightsName) const noexcept
2557 {
2558 return mImpl->getWeightsPrototype(weightsName);
2559 }
2560
2561protected:
2562 apiv::VRefitter* mImpl;
2563};
2564
2575enum class OptProfileSelector : int32_t
2576{
2577 kMIN = 0,
2578 kOPT = 1,
2579 kMAX = 2
2580};
2581
2587template <>
2588constexpr inline int32_t EnumMax<OptProfileSelector>() noexcept
2589{
2590 return 3;
2591}
2592
2616{
2617public:
2645 bool setDimensions(char const* inputName, OptProfileSelector select, Dims const& dims) noexcept
2646 {
2647 return mImpl->setDimensions(inputName, select, dims);
2648 }
2649
2657 Dims getDimensions(char const* inputName, OptProfileSelector select) const noexcept
2658 {
2659 return mImpl->getDimensions(inputName, select);
2660 }
2661
2705 char const* inputName, OptProfileSelector select, int32_t const* values, int32_t nbValues) noexcept
2706 {
2707 return mImpl->setShapeValues(inputName, select, values, nbValues);
2708 }
2709
2718 int32_t getNbShapeValues(char const* inputName) const noexcept
2719 {
2720 return mImpl->getNbShapeValues(inputName);
2721 }
2722
2730 int32_t const* getShapeValues(char const* inputName, OptProfileSelector select) const noexcept
2731 {
2732 return mImpl->getShapeValues(inputName, select);
2733 }
2734
2748 bool setExtraMemoryTarget(float target) noexcept
2749 {
2750 return mImpl->setExtraMemoryTarget(target);
2751 }
2752
2760 float getExtraMemoryTarget() const noexcept
2761 {
2762 return mImpl->getExtraMemoryTarget();
2763 }
2764
2777 bool isValid() const noexcept
2778 {
2779 return mImpl->isValid();
2780 }
2781
2782protected:
2783 apiv::VOptimizationProfile* mImpl;
2784 virtual ~IOptimizationProfile() noexcept = default;
2785};
2786
2794enum class TacticSource : int32_t
2795{
2800
2804
2809
2814
2818};
2819
2820template <>
2821constexpr inline int32_t EnumMax<TacticSource>() noexcept
2822{
2823 return 5;
2824}
2825
2832using TacticSources = uint32_t;
2833
2843enum class ProfilingVerbosity : int32_t
2844{
2845 kLAYER_NAMES_ONLY = 0,
2846 kNONE = 1,
2847 kDETAILED = 2,
2848};
2849
2851template <>
2852constexpr inline int32_t EnumMax<ProfilingVerbosity>() noexcept
2853{
2854 return 3;
2855}
2856
2863using SerializationFlags = uint32_t;
2864
2872enum class SerializationFlag : int32_t
2873{
2874 kEXCLUDE_WEIGHTS = 0,
2876};
2877
2879template <>
2880constexpr inline int32_t EnumMax<SerializationFlag>() noexcept
2881{
2882 return 2;
2883}
2884
2893{
2894public:
2895 virtual ~ISerializationConfig() noexcept = default;
2896
2908 bool setFlags(SerializationFlags serializationFlags) noexcept
2909 {
2910 return mImpl->setFlags(serializationFlags);
2911 }
2912
2921 {
2922 return mImpl->getFlags();
2923 }
2924
2932 bool clearFlag(SerializationFlag serializationFlag) noexcept
2933 {
2934 return mImpl->clearFlag(serializationFlag);
2935 }
2936
2944 bool setFlag(SerializationFlag serializationFlag) noexcept
2945 {
2946 return mImpl->setFlag(serializationFlag);
2947 }
2948
2956 bool getFlag(SerializationFlag serializationFlag) const noexcept
2957 {
2958 return mImpl->getFlag(serializationFlag);
2959 }
2960
2961protected:
2962 apiv::VSerializationConfig* mImpl;
2963};
2964
2977{
2978 kSTATIC = 0,
2979 kON_PROFILE_CHANGE = 1,
2980 kUSER_MANAGED = 2,
2981};
2982
2988template <>
2989constexpr inline int32_t EnumMax<ExecutionContextAllocationStrategy>() noexcept
2990{
2991 return 3;
2992}
2993
3001class ICudaEngine : public INoCopy
3002{
3003public:
3004 virtual ~ICudaEngine() noexcept = default;
3005
3016 Dims getTensorShape(char const* tensorName) const noexcept
3017 {
3018 return mImpl->getTensorShape(tensorName);
3019 }
3020
3031 DataType getTensorDataType(char const* tensorName) const noexcept
3032 {
3033 return mImpl->getTensorDataType(tensorName);
3034 }
3035
3045 int32_t getNbLayers() const noexcept
3046 {
3047 return mImpl->getNbLayers();
3048 }
3049
3059 IHostMemory* serialize() const noexcept
3060 {
3061 return mImpl->serialize();
3062 }
3063
3078 {
3079 return mImpl->createExecutionContext(strategy);
3080 }
3081
3094 TensorLocation getTensorLocation(char const* tensorName) const noexcept
3095 {
3096 return mImpl->getTensorLocation(tensorName);
3097 }
3098
3114 bool isShapeInferenceIO(char const* tensorName) const noexcept
3115 {
3116 return mImpl->isShapeInferenceIO(tensorName);
3117 }
3118
3128 TensorIOMode getTensorIOMode(char const* tensorName) const noexcept
3129 {
3130 return mImpl->getTensorIOMode(tensorName);
3131 }
3132
3141 {
3142 return mImpl->createExecutionContextWithoutDeviceMemory();
3143 }
3144
3152 TRT_DEPRECATED size_t getDeviceMemorySize() const noexcept
3153 {
3154 return mImpl->getDeviceMemorySize();
3155 }
3156
3164 TRT_DEPRECATED size_t getDeviceMemorySizeForProfile(int32_t profileIndex) const noexcept
3165 {
3166 return mImpl->getDeviceMemorySizeForProfile(profileIndex);
3167 }
3168
3180 int64_t getDeviceMemorySizeV2() const noexcept
3181 {
3182 return mImpl->getDeviceMemorySizeV2();
3183 }
3184
3196 int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept
3197 {
3198 return mImpl->getDeviceMemorySizeForProfileV2(profileIndex);
3199 }
3200
3206 bool isRefittable() const noexcept
3207 {
3208 return mImpl->isRefittable();
3209 }
3210
3227 int32_t getTensorBytesPerComponent(char const* tensorName) const noexcept
3228 {
3229 return mImpl->getTensorBytesPerComponent(tensorName);
3230 }
3231
3245 int32_t getTensorBytesPerComponent(char const* tensorName, int32_t profileIndex) const noexcept
3246 {
3247 return mImpl->getTensorBytesPerComponentV2(tensorName, profileIndex);
3248 }
3249
3266 int32_t getTensorComponentsPerElement(char const* tensorName) const noexcept
3267 {
3268 return mImpl->getTensorComponentsPerElement(tensorName);
3269 }
3270
3284 int32_t getTensorComponentsPerElement(char const* tensorName, int32_t profileIndex) const noexcept
3285 {
3286 return mImpl->getTensorComponentsPerElementV2(tensorName, profileIndex);
3287 }
3288
3299 TensorFormat getTensorFormat(char const* tensorName) const noexcept
3300 {
3301 return mImpl->getTensorFormat(tensorName);
3302 }
3303
3313 TensorFormat getTensorFormat(char const* tensorName, int32_t profileIndex) const noexcept
3314 {
3315 return mImpl->getTensorFormatV2(tensorName, profileIndex);
3316 }
3317
3337 char const* getTensorFormatDesc(char const* tensorName) const noexcept
3338 {
3339 return mImpl->getTensorFormatDesc(tensorName);
3340 }
3341
3360 char const* getTensorFormatDesc(char const* tensorName, int32_t profileIndex) const noexcept
3361 {
3362 return mImpl->getTensorFormatDescV2(tensorName, profileIndex);
3363 }
3364
3377 int32_t getTensorVectorizedDim(char const* tensorName) const noexcept
3378 {
3379 return mImpl->getTensorVectorizedDim(tensorName);
3380 }
3381
3393 int32_t getTensorVectorizedDim(char const* tensorName, int32_t profileIndex) const noexcept
3394 {
3395 return mImpl->getTensorVectorizedDimV2(tensorName, profileIndex);
3396 }
3397
3408 char const* getName() const noexcept
3409 {
3410 return mImpl->getName();
3411 }
3412
3419 int32_t getNbOptimizationProfiles() const noexcept
3420 {
3421 return mImpl->getNbOptimizationProfiles();
3422 }
3423
3439 Dims getProfileShape(char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
3440 {
3441 return mImpl->getProfileShape(tensorName, profileIndex, select);
3442 }
3443
3461 int32_t const* getProfileTensorValues(char const* tensorName, int32_t profileIndex, OptProfileSelector select) const
3462 noexcept
3463 {
3464 return mImpl->getProfileTensorValues(tensorName, profileIndex, select);
3465 }
3466
3478 {
3479 return mImpl->getEngineCapability();
3480 }
3481
3496 void setErrorRecorder(IErrorRecorder* recorder) noexcept
3497 {
3498 return mImpl->setErrorRecorder(recorder);
3499 }
3500
3512 {
3513 return mImpl->getErrorRecorder();
3514 }
3515
3526 {
3527 return mImpl->hasImplicitBatchDimension();
3528 }
3529
3542 {
3543 return mImpl->getTacticSources();
3544 }
3545
3554 {
3555 return mImpl->getProfilingVerbosity();
3556 }
3557
3564 {
3565 return mImpl->createEngineInspector();
3566 }
3567
3576 int32_t getNbIOTensors() const noexcept
3577 {
3578 return mImpl->getNbIOTensors();
3579 }
3580
3588 char const* getIOTensorName(int32_t index) const noexcept
3589 {
3590 return mImpl->getIOTensorName(index);
3591 }
3592
3602 {
3603 return mImpl->getHardwareCompatibilityLevel();
3604 }
3605
3616 int32_t getNbAuxStreams() const noexcept
3617 {
3618 return mImpl->getNbAuxStreams();
3619 }
3620
3627 {
3628 return mImpl->createSerializationConfig();
3629 }
3630
3643 {
3644 return mImpl->serializeWithConfig(config);
3645 }
3646
3687 TRT_DEPRECATED bool setWeightStreamingBudget(int64_t gpuMemoryBudget) noexcept
3688 {
3689 return mImpl->setWeightStreamingBudget(gpuMemoryBudget);
3690 }
3691
3708 {
3709 return mImpl->getWeightStreamingBudget();
3710 }
3711
3731 {
3732 return mImpl->getMinimumWeightStreamingBudget();
3733 }
3734
3746 int64_t getStreamableWeightsSize() const noexcept
3747 {
3748 return mImpl->getStreamableWeightsSize();
3749 }
3750
3788 bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept
3789 {
3790 return mImpl->setWeightStreamingBudgetV2(gpuMemoryBudget);
3791 }
3792
3806 int64_t getWeightStreamingBudgetV2() const noexcept
3807 {
3808 return mImpl->getWeightStreamingBudgetV2();
3809 }
3810
3831 int64_t getWeightStreamingAutomaticBudget() const noexcept
3832 {
3833 return mImpl->getWeightStreamingAutomaticBudget();
3834 }
3835
3860 {
3861 return mImpl->getWeightStreamingScratchMemorySize();
3862 }
3863
3873 bool isDebugTensor(char const* name) const noexcept
3874 {
3875 return mImpl->isDebugTensor(name);
3876 }
3877
3878protected:
3879 apiv::VCudaEngine* mImpl;
3880};
3881
3882namespace v_1_0
3883{
3885{
3886public:
3890 InterfaceInfo getInterfaceInfo() const noexcept override
3891 {
3892 return {"IOutputAllocator", 1, 0};
3893 }
3894
3915 char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment) noexcept
3916 {
3917 return nullptr;
3918 }
3919
3943 char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment, cudaStream_t /*stream*/)
3944 {
3945 return reallocateOutput(tensorName, currentMemory, size, alignment);
3946 }
3947
3956 virtual void notifyShape(char const* tensorName, Dims const& dims) noexcept = 0;
3957};
3958} // namespace v_1_0
3959
3968
3969namespace v_1_0
3970{
3972{
3973public:
3977 InterfaceInfo getInterfaceInfo() const noexcept override
3978 {
3979 return {"IDebugListener", 1, 0};
3980 }
3981
3995 virtual bool processDebugTensor(void const* addr, TensorLocation location, DataType type, Dims const& shape,
3996 char const* name, cudaStream_t stream)
3997 = 0;
3998
3999 ~IDebugListener() override = default;
4000};
4001} // namespace v_1_0
4002
4009
4021{
4022public:
4023 virtual ~IExecutionContext() noexcept = default;
4024
4033 void setDebugSync(bool sync) noexcept
4034 {
4035 mImpl->setDebugSync(sync);
4036 }
4037
4043 bool getDebugSync() const noexcept
4044 {
4045 return mImpl->getDebugSync();
4046 }
4047
4053 void setProfiler(IProfiler* profiler) noexcept
4054 {
4055 mImpl->setProfiler(profiler);
4056 }
4057
4063 IProfiler* getProfiler() const noexcept
4064 {
4065 return mImpl->getProfiler();
4066 }
4067
4073 ICudaEngine const& getEngine() const noexcept
4074 {
4075 return mImpl->getEngine();
4076 }
4077
4087 void setName(char const* name) noexcept
4088 {
4089 mImpl->setName(name);
4090 }
4091
4097 char const* getName() const noexcept
4098 {
4099 return mImpl->getName();
4100 }
4101
4124 void setDeviceMemory(void* memory) noexcept
4125 {
4126 mImpl->setDeviceMemory(memory);
4127 }
4128
4146 void setDeviceMemoryV2(void* memory, int64_t size) noexcept
4147 {
4148 return mImpl->setDeviceMemoryV2(memory, size);
4149 }
4150
4167 Dims getTensorStrides(char const* tensorName) const noexcept
4168 {
4169 return mImpl->getTensorStrides(tensorName);
4170 }
4171
4172public:
4182 int32_t getOptimizationProfile() const noexcept
4183 {
4184 return mImpl->getOptimizationProfile();
4185 }
4186
4200 bool setInputShape(char const* tensorName, Dims const& dims) noexcept
4201 {
4202 return mImpl->setInputShape(tensorName, dims);
4203 }
4204
4237 Dims getTensorShape(char const* tensorName) const noexcept
4238 {
4239 return mImpl->getTensorShape(tensorName);
4240 }
4241
4253 bool allInputDimensionsSpecified() const noexcept
4254 {
4255 return mImpl->allInputDimensionsSpecified();
4256 }
4257
4271 {
4272 return mImpl->allInputShapesSpecified();
4273 }
4274
4289 void setErrorRecorder(IErrorRecorder* recorder) noexcept
4290 {
4291 mImpl->setErrorRecorder(recorder);
4292 }
4293
4305 {
4306 return mImpl->getErrorRecorder();
4307 }
4308
4321 bool executeV2(void* const* bindings) noexcept
4322 {
4323 return mImpl->executeV2(bindings);
4324 }
4325
4365 bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
4366 {
4367 return mImpl->setOptimizationProfileAsync(profileIndex, stream);
4368 }
4369
4381 void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
4382 {
4383 mImpl->setEnqueueEmitsProfile(enqueueEmitsProfile);
4384 }
4385
4393 bool getEnqueueEmitsProfile() const noexcept
4394 {
4395 return mImpl->getEnqueueEmitsProfile();
4396 }
4397
4423 bool reportToProfiler() const noexcept
4424 {
4425 return mImpl->reportToProfiler();
4426 }
4427
4467 bool setTensorAddress(char const* tensorName, void* data) noexcept
4468 {
4469 return mImpl->setTensorAddress(tensorName, data);
4470 }
4471
4484 void const* getTensorAddress(char const* tensorName) const noexcept
4485 {
4486 return mImpl->getTensorAddress(tensorName);
4487 }
4488
4507 bool setOutputTensorAddress(char const* tensorName, void* data) noexcept
4508 {
4509 return mImpl->setOutputTensorAddress(tensorName, data);
4510 }
4511
4529 bool setInputTensorAddress(char const* tensorName, void const* data) noexcept
4530 {
4531 return mImpl->setInputTensorAddress(tensorName, data);
4532 }
4533
4548 void* getOutputTensorAddress(char const* tensorName) const noexcept
4549 {
4550 return mImpl->getOutputTensorAddress(tensorName);
4551 }
4552
4581 int32_t inferShapes(int32_t nbMaxNames, char const** tensorNames) noexcept
4582 {
4583 return mImpl->inferShapes(nbMaxNames, tensorNames);
4584 }
4585
4599 {
4600 return mImpl->updateDeviceMemorySizeForShapes();
4601 }
4602
4614 bool setInputConsumedEvent(cudaEvent_t event) noexcept
4615 {
4616 return mImpl->setInputConsumedEvent(event);
4617 }
4618
4624 cudaEvent_t getInputConsumedEvent() const noexcept
4625 {
4626 return mImpl->getInputConsumedEvent();
4627 }
4628
4643 bool setOutputAllocator(char const* tensorName, IOutputAllocator* outputAllocator) noexcept
4644 {
4645 return mImpl->setOutputAllocator(tensorName, outputAllocator);
4646 }
4647
4656 IOutputAllocator* getOutputAllocator(char const* tensorName) const noexcept
4657 {
4658 return mImpl->getOutputAllocator(tensorName);
4659 }
4660
4674 int64_t getMaxOutputSize(char const* tensorName) const noexcept
4675 {
4676 return mImpl->getMaxOutputSize(tensorName);
4677 }
4678
4695 {
4696 return mImpl->setTemporaryStorageAllocator(allocator);
4697 }
4698
4705 {
4706 return mImpl->getTemporaryStorageAllocator();
4707 }
4708
4728 bool enqueueV3(cudaStream_t stream) noexcept
4729 {
4730 return mImpl->enqueueV3(stream);
4731 }
4732
4744 void setPersistentCacheLimit(size_t size) noexcept
4745 {
4746 mImpl->setPersistentCacheLimit(size);
4747 }
4748
4755 size_t getPersistentCacheLimit() const noexcept
4756 {
4757 return mImpl->getPersistentCacheLimit();
4758 }
4759
4779 bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
4780 {
4781 return mImpl->setNvtxVerbosity(verbosity);
4782 }
4783
4792 {
4793 return mImpl->getNvtxVerbosity();
4794 }
4795
4822 void setAuxStreams(cudaStream_t* auxStreams, int32_t nbStreams) noexcept
4823 {
4824 mImpl->setAuxStreams(auxStreams, nbStreams);
4825 }
4826
4834 bool setDebugListener(IDebugListener* listener) noexcept
4835 {
4836 return mImpl->setDebugListener(listener);
4837 }
4838
4845 {
4846 return mImpl->getDebugListener();
4847 }
4848
4863 bool setTensorDebugState(char const* name, bool flag) noexcept
4864 {
4865 return mImpl->setTensorDebugState(name, flag);
4866 }
4867
4876 bool setAllTensorsDebugState(bool flag) noexcept
4877 {
4878 return mImpl->setAllTensorsDebugState(flag);
4879 }
4880
4886 bool getDebugState(char const* name) const noexcept
4887 {
4888 return mImpl->getDebugState(name);
4889 }
4890
4891protected:
4892 apiv::VExecutionContext* mImpl;
4893}; // class IExecutionContext
4894
4902enum class LayerInformationFormat : int32_t
4903{
4904 kONELINE = 0,
4905 kJSON = 1,
4906};
4907
4910template <>
4911constexpr inline int32_t EnumMax<LayerInformationFormat>() noexcept
4912{
4913 return 2;
4914}
4915
4932{
4933public:
4934 virtual ~IEngineInspector() noexcept = default;
4935
4948 bool setExecutionContext(IExecutionContext const* context) noexcept
4949 {
4950 return mImpl->setExecutionContext(context);
4951 }
4952
4961 {
4962 return mImpl->getExecutionContext();
4963 }
4964
4985 char const* getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
4986 {
4987 return mImpl->getLayerInformation(layerIndex, format);
4988 }
4989
5008 char const* getEngineInformation(LayerInformationFormat format) const noexcept
5009 {
5010 return mImpl->getEngineInformation(format);
5011 }
5012
5027 void setErrorRecorder(IErrorRecorder* recorder) noexcept
5028 {
5029 mImpl->setErrorRecorder(recorder);
5030 }
5031
5043 {
5044 return mImpl->getErrorRecorder();
5045 }
5046
5047protected:
5048 apiv::VEngineInspector* mImpl;
5049}; // class IEngineInspector
5050
5051} // namespace nvinfer1
5052
5057extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(void* logger, int32_t version) noexcept;
5058
5063extern "C" TENSORRTAPI void* createInferRefitter_INTERNAL(void* engine, void* logger, int32_t version) noexcept;
5064
5069
5075extern "C" TENSORRTAPI nvinfer1::ILogger* getLogger() noexcept;
5076
5077namespace nvinfer1
5078{
5079namespace // unnamed namespace avoids linkage surprises when linking objects built with different versions of this
5080 // header.
5081{
5087inline IRuntime* createInferRuntime(ILogger& logger) noexcept
5088{
5089 return static_cast<IRuntime*>(createInferRuntime_INTERNAL(&logger, NV_TENSORRT_VERSION));
5090}
5091
5097inline IRefitter* createInferRefitter(ICudaEngine& engine, ILogger& logger) noexcept
5098{
5099 return static_cast<IRefitter*>(createInferRefitter_INTERNAL(&engine, &logger, NV_TENSORRT_VERSION));
5100}
5101
5102} // namespace
5103
5115template <typename T>
5117{
5118public:
5120 {
5121 getPluginRegistry()->registerCreator(instance, "");
5122 }
5123
5124private:
5126 T instance{};
5127};
5128
5129} // namespace nvinfer1
5130
5131#define REGISTER_TENSORRT_PLUGIN(name) \
5132 static nvinfer1::PluginRegistrar<name> pluginRegistrar##name {}
5133
5134namespace nvinfer1
5135{
5145{
5146public:
5154 virtual ILogger* findLogger() = 0;
5155
5156protected:
5157 virtual ~ILoggerFinder() = default;
5158};
5159
5162namespace v_1_0
5163{
5164
5166{
5167public:
5169 ~IGpuAsyncAllocator() override = default;
5170
5200 void* allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags,
5201 cudaStream_t /*stream*/) noexcept override = 0;
5202
5228 bool deallocateAsync(void* const memory, cudaStream_t /*stream*/) noexcept override = 0;
5229
5253 uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept override
5254 {
5255 return allocateAsync(size, alignment, flags, nullptr);
5256 }
5257
5276 TRT_DEPRECATED bool deallocate(void* const memory) noexcept override
5277 {
5278 return deallocateAsync(memory, nullptr);
5279 }
5280
5284 InterfaceInfo getInterfaceInfo() const noexcept override
5285 {
5286 return {"IGpuAllocator", 1, 0};
5287 }
5288};
5289
5291{
5292public:
5296 InterfaceInfo getInterfaceInfo() const noexcept override
5297 {
5298 return InterfaceInfo{"PLUGIN CREATOR_V3ONE", 1, 0};
5299 }
5300
5318 AsciiChar const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept = 0;
5319
5326 virtual PluginFieldCollection const* getFieldNames() noexcept = 0;
5327
5334 virtual AsciiChar const* getPluginName() const noexcept = 0;
5335
5342 virtual AsciiChar const* getPluginVersion() const noexcept = 0;
5343
5350 virtual AsciiChar const* getPluginNamespace() const noexcept = 0;
5351
5353 virtual ~IPluginCreatorV3One() = default;
5354
5355protected:
5358 IPluginCreatorV3One& operator=(IPluginCreatorV3One const&) & = default;
5359 IPluginCreatorV3One& operator=(IPluginCreatorV3One&&) & = default;
5360};
5361
5362} // namespace v_1_0
5363
5378
5388
5389} // namespace nvinfer1
5390
5394extern "C" TENSORRTAPI int32_t getInferLibMajorVersion() noexcept;
5398extern "C" TENSORRTAPI int32_t getInferLibMinorVersion() noexcept;
5402extern "C" TENSORRTAPI int32_t getInferLibPatchVersion() noexcept;
5406extern "C" TENSORRTAPI int32_t getInferLibBuildVersion() noexcept;
5407
5408#endif // NV_INFER_RUNTIME_H
nvinfer1::ILogger * getLogger() noexcept
Return the logger object.
int32_t getInferLibMajorVersion() noexcept
Return the library major version number.
nvinfer1::IPluginRegistry * getPluginRegistry() noexcept
Return the plugin registry.
int32_t getInferLibPatchVersion() noexcept
Return the library patch version number.
int32_t getInferLibMinorVersion() noexcept
Return the library minor version number.
int32_t getInferLibBuildVersion() noexcept
Return the library build version number.
#define TENSORRTAPI
Definition: NvInferRuntimeBase.h:59
#define NV_TENSORRT_VERSION
Definition: NvInferRuntimeBase.h:91
#define TRT_DEPRECATED
Definition: NvInferRuntimeBase.h:45
#define TRT_DEPRECATED_ENUM
Definition: NvInferRuntimeBase.h:46
Structure to define the dimensions of a tensor.
Definition: NvInferRuntimeBase.h:200
static constexpr int32_t MAX_DIMS
The maximum rank (number of dimensions) supported for a tensor.
Definition: NvInferRuntimeBase.h:203
Analog of class Dims with expressions instead of constants for the dimensions.
Definition: NvInferRuntime.h:350
IDimensionExpr const * d[Dims::MAX_DIMS]
The extent of each dimension.
Definition: NvInferRuntime.h:353
int32_t nbDims
The number of dimensions.
Definition: NvInferRuntime.h:352
An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:3002
int32_t getTensorBytesPerComponent(char const *tensorName) const noexcept
Return the number of bytes per component of an element, or -1 if the tensor is not vectorized or prov...
Definition: NvInferRuntime.h:3227
ISerializationConfig * createSerializationConfig() noexcept
Create a serialization configuration object.
Definition: NvInferRuntime.h:3626
TRT_DEPRECATED int64_t getWeightStreamingBudget() const noexcept
Returns the current weight streaming device memory budget in bytes.
Definition: NvInferRuntime.h:3707
int32_t const * getProfileTensorValues(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values (not dimensions) for an input tensor given its name under ...
Definition: NvInferRuntime.h:3461
char const * getIOTensorName(int32_t index) const noexcept
Return name of an IO tensor.
Definition: NvInferRuntime.h:3588
int64_t getWeightStreamingBudgetV2() const noexcept
Returns the current weight streaming device memory budget in bytes.
Definition: NvInferRuntime.h:3806
EngineCapability getEngineCapability() const noexcept
Determine what execution capability this engine has.
Definition: NvInferRuntime.h:3477
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:3511
TensorFormat getTensorFormat(char const *tensorName, int32_t profileIndex) const noexcept
Return the tensor format of given profile, or TensorFormat::kLINEAR if the provided name does not map...
Definition: NvInferRuntime.h:3313
TRT_DEPRECATED bool hasImplicitBatchDimension() const noexcept
Query whether the engine was built with an implicit batch dimension.
Definition: NvInferRuntime.h:3525
apiv::VCudaEngine * mImpl
Definition: NvInferRuntime.h:3879
TRT_DEPRECATED size_t getDeviceMemorySizeForProfile(int32_t profileIndex) const noexcept
Return the maximum device memory required by the context for a profile.
Definition: NvInferRuntime.h:3164
IExecutionContext * createExecutionContext(ExecutionContextAllocationStrategy strategy=ExecutionContextAllocationStrategy::kSTATIC) noexcept
Create an execution context and specify the strategy for allocating internal activation memory.
Definition: NvInferRuntime.h:3076
char const * getTensorFormatDesc(char const *tensorName) const noexcept
Return the human readable description of the tensor format, or empty string if the provided name does...
Definition: NvInferRuntime.h:3337
Dims getProfileShape(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for an input tensor given its name under an optimizati...
Definition: NvInferRuntime.h:3439
bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept
Limit the maximum amount of GPU memory usable for network weights in bytes.
Definition: NvInferRuntime.h:3788
int32_t getNbAuxStreams() const noexcept
Return the number of auxiliary streams used by this engine.
Definition: NvInferRuntime.h:3616
int64_t getStreamableWeightsSize() const noexcept
Get the total size in bytes of all streamable weights.
Definition: NvInferRuntime.h:3746
DataType getTensorDataType(char const *tensorName) const noexcept
Determine the required data type for a buffer from its tensor name.
Definition: NvInferRuntime.h:3031
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:3496
TacticSources getTacticSources() const noexcept
return the tactic sources required by this engine.
Definition: NvInferRuntime.h:3541
IHostMemory * serializeWithConfig(ISerializationConfig &config) const noexcept
Serialize the network to a stream with the provided SerializationConfig.
Definition: NvInferRuntime.h:3642
virtual ~ICudaEngine() noexcept=default
int64_t getWeightStreamingAutomaticBudget() const noexcept
TensorRT automatically determines a device memory budget for the model to run. The budget is close to...
Definition: NvInferRuntime.h:3831
bool isDebugTensor(char const *name) const noexcept
Check if a tensor is marked as a debug tensor.
Definition: NvInferRuntime.h:3873
int32_t getTensorVectorizedDim(char const *tensorName, int32_t profileIndex) const noexcept
Return the dimension index that the buffer is vectorized of given profile, or -1 if the provided name...
Definition: NvInferRuntime.h:3393
char const * getName() const noexcept
Returns the name of the network associated with the engine.
Definition: NvInferRuntime.h:3408
ProfilingVerbosity getProfilingVerbosity() const noexcept
Return the ProfilingVerbosity the builder config was set to when the engine was built.
Definition: NvInferRuntime.h:3553
bool isShapeInferenceIO(char const *tensorName) const noexcept
True if tensor is required as input for shape calculations or is output from shape calculations.
Definition: NvInferRuntime.h:3114
int64_t getWeightStreamingScratchMemorySize() const noexcept
Returns the size of the scratch memory required by the current weight streaming budget.
Definition: NvInferRuntime.h:3859
TRT_DEPRECATED bool setWeightStreamingBudget(int64_t gpuMemoryBudget) noexcept
Limit the maximum amount of GPU memory usable for network weights in bytes.
Definition: NvInferRuntime.h:3687
int64_t getDeviceMemorySizeV2() const noexcept
Return the maximum device memory required by the context over all profiles.
Definition: NvInferRuntime.h:3180
int32_t getTensorVectorizedDim(char const *tensorName) const noexcept
Return the dimension index that the buffer is vectorized, or -1 if the provided name does not map to ...
Definition: NvInferRuntime.h:3377
TRT_DEPRECATED size_t getDeviceMemorySize() const noexcept
Return the maximum device memory required by the context over all profiles.
Definition: NvInferRuntime.h:3152
int32_t getTensorComponentsPerElement(char const *tensorName, int32_t profileIndex) const noexcept
Return the number of components included in one element of given profile, or -1 if tensor is not vect...
Definition: NvInferRuntime.h:3284
int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept
Return the maximum device memory required by the context for a profile.
Definition: NvInferRuntime.h:3196
TensorFormat getTensorFormat(char const *tensorName) const noexcept
Return the tensor format, or TensorFormat::kLINEAR if the provided name does not map to an input or o...
Definition: NvInferRuntime.h:3299
IHostMemory * serialize() const noexcept
Serialize the network to a stream.
Definition: NvInferRuntime.h:3059
TensorLocation getTensorLocation(char const *tensorName) const noexcept
Get whether an input or output tensor must be on GPU or CPU.
Definition: NvInferRuntime.h:3094
IEngineInspector * createEngineInspector() const noexcept
Create a new engine inspector which prints the layer information in an engine or an execution context...
Definition: NvInferRuntime.h:3563
int32_t getTensorBytesPerComponent(char const *tensorName, int32_t profileIndex) const noexcept
Return the number of bytes per component of an element given of given profile, or -1 if the tensor is...
Definition: NvInferRuntime.h:3245
HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept
Return the hardware compatibility level of this engine.
Definition: NvInferRuntime.h:3601
int32_t getNbOptimizationProfiles() const noexcept
Get the number of optimization profiles defined for this engine.
Definition: NvInferRuntime.h:3419
TRT_DEPRECATED IExecutionContext * createExecutionContextWithoutDeviceMemory() noexcept
create an execution context without any device memory allocated
Definition: NvInferRuntime.h:3140
char const * getTensorFormatDesc(char const *tensorName, int32_t profileIndex) const noexcept
Return the human readable description of the tensor format of given profile, or empty string if the p...
Definition: NvInferRuntime.h:3360
TRT_DEPRECATED int64_t getMinimumWeightStreamingBudget() const noexcept
The minimum number of bytes of GPU memory required by network weights for successful weight streaming...
Definition: NvInferRuntime.h:3730
TensorIOMode getTensorIOMode(char const *tensorName) const noexcept
Determine whether a tensor is an input or output tensor.
Definition: NvInferRuntime.h:3128
int32_t getNbLayers() const noexcept
Get the number of layers in the network.
Definition: NvInferRuntime.h:3045
int32_t getNbIOTensors() const noexcept
Return number of IO tensors.
Definition: NvInferRuntime.h:3576
int32_t getTensorComponentsPerElement(char const *tensorName) const noexcept
Return the number of components included in one element, or -1 if tensor is not vectorized or if the ...
Definition: NvInferRuntime.h:3266
bool isRefittable() const noexcept
Return true if an engine can be refit.
Definition: NvInferRuntime.h:3206
An IDimensionExpr represents an integer expression constructed from constants, input dimensions,...
Definition: NvInferRuntime.h:232
bool isConstant() const noexcept
Return true if expression is a build-time constant.
Definition: NvInferRuntime.h:237
bool isSizeTensor() const noexcept
Return true if this denotes the value of a size tensor.
Definition: NvInferRuntime.h:263
virtual ~IDimensionExpr() noexcept=default
apiv::VDimensionExpr * mImpl
Definition: NvInferRuntime.h:254
int64_t getConstantValue() const noexcept
Get the value of the constant.
Definition: NvInferRuntime.h:248
An engine inspector which prints out the layer information of an engine or an execution context.
Definition: NvInferRuntime.h:4932
char const * getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
Get a string describing the information about a specific layer in the current engine or the execution...
Definition: NvInferRuntime.h:4985
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:5042
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:5027
IExecutionContext const * getExecutionContext() const noexcept
Get the context currently being inspected.
Definition: NvInferRuntime.h:4960
apiv::VEngineInspector * mImpl
Definition: NvInferRuntime.h:5048
virtual ~IEngineInspector() noexcept=default
char const * getEngineInformation(LayerInformationFormat format) const noexcept
Get a string describing the information about all the layers in the current engine or the execution c...
Definition: NvInferRuntime.h:5008
Context for executing inference using an engine, with functionally unsafe features.
Definition: NvInferRuntime.h:4021
IOutputAllocator * getOutputAllocator(char const *tensorName) const noexcept
Get output allocator associated with output tensor of given name, or nullptr if the provided name doe...
Definition: NvInferRuntime.h:4656
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:4304
bool reportToProfiler() const noexcept
Calculate layer timing info for the current optimization profile in IExecutionContext and update the ...
Definition: NvInferRuntime.h:4423
virtual ~IExecutionContext() noexcept=default
void setDeviceMemory(void *memory) noexcept
Set the device memory for use by this execution context.
Definition: NvInferRuntime.h:4124
TRT_DEPRECATED bool allInputShapesSpecified() const noexcept
Whether all input shape bindings have been specified.
Definition: NvInferRuntime.h:4270
bool setTensorDebugState(char const *name, bool flag) noexcept
Set debug state of tensor given the tensor name.
Definition: NvInferRuntime.h:4863
char const * getName() const noexcept
Return the name of the execution context.
Definition: NvInferRuntime.h:4097
IGpuAllocator * getTemporaryStorageAllocator() const noexcept
Get allocator set by setTemporaryStorageAllocator.
Definition: NvInferRuntime.h:4704
void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
Set whether enqueue emits layer timing to the profiler.
Definition: NvInferRuntime.h:4381
Dims getTensorShape(char const *tensorName) const noexcept
Return the shape of the given input or output.
Definition: NvInferRuntime.h:4237
bool getDebugState(char const *name) const noexcept
Definition: NvInferRuntime.h:4886
bool setInputShape(char const *tensorName, Dims const &dims) noexcept
Set shape of given input.
Definition: NvInferRuntime.h:4200
bool executeV2(void *const *bindings) noexcept
Synchronously execute a network.
Definition: NvInferRuntime.h:4321
bool getEnqueueEmitsProfile() const noexcept
Get the enqueueEmitsProfile state.
Definition: NvInferRuntime.h:4393
void const * getTensorAddress(char const *tensorName) const noexcept
Get memory address bound to given input or output tensor, or nullptr if the provided name does not ma...
Definition: NvInferRuntime.h:4484
bool setOutputAllocator(char const *tensorName, IOutputAllocator *outputAllocator) noexcept
Set output allocator to use for output tensor of given name. Pass nullptr to outputAllocator to unset...
Definition: NvInferRuntime.h:4643
bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
Select an optimization profile for the current context with async semantics.
Definition: NvInferRuntime.h:4365
apiv::VExecutionContext * mImpl
Definition: NvInferRuntime.h:4892
bool setOutputTensorAddress(char const *tensorName, void *data) noexcept
Set the memory address for a given output tensor.
Definition: NvInferRuntime.h:4507
void setPersistentCacheLimit(size_t size) noexcept
Set the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:4744
size_t getPersistentCacheLimit() const noexcept
Get the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:4755
bool setAllTensorsDebugState(bool flag) noexcept
Definition: NvInferRuntime.h:4876
ICudaEngine const & getEngine() const noexcept
Get the associated engine.
Definition: NvInferRuntime.h:4073
ProfilingVerbosity getNvtxVerbosity() const noexcept
Get the NVTX verbosity of the execution context.
Definition: NvInferRuntime.h:4791
size_t updateDeviceMemorySizeForShapes() noexcept
Recompute the internal activation buffer sizes based on the current input shapes, and return the tota...
Definition: NvInferRuntime.h:4598
void setAuxStreams(cudaStream_t *auxStreams, int32_t nbStreams) noexcept
Set the auxiliary streams that TensorRT should launch kernels on in the next enqueueV3() call.
Definition: NvInferRuntime.h:4822
int64_t getMaxOutputSize(char const *tensorName) const noexcept
Get upper bound on an output tensor's size, in bytes, based on the current optimization profile and i...
Definition: NvInferRuntime.h:4674
int32_t inferShapes(int32_t nbMaxNames, char const **tensorNames) noexcept
Run shape calculations.
Definition: NvInferRuntime.h:4581
bool setDebugListener(IDebugListener *listener) noexcept
Set DebugListener for this execution context.
Definition: NvInferRuntime.h:4834
bool setTensorAddress(char const *tensorName, void *data) noexcept
Set memory address for given input or output tensor.
Definition: NvInferRuntime.h:4467
bool setTemporaryStorageAllocator(IGpuAllocator *allocator) noexcept
Specify allocator to use for internal temporary storage.
Definition: NvInferRuntime.h:4694
void * getOutputTensorAddress(char const *tensorName) const noexcept
Get memory address for given output.
Definition: NvInferRuntime.h:4548
bool enqueueV3(cudaStream_t stream) noexcept
Enqueue inference on a stream.
Definition: NvInferRuntime.h:4728
IDebugListener * getDebugListener() noexcept
Get the DebugListener of this execution context.
Definition: NvInferRuntime.h:4844
int32_t getOptimizationProfile() const noexcept
Get the index of the currently selected optimization profile.
Definition: NvInferRuntime.h:4182
bool setInputTensorAddress(char const *tensorName, void const *data) noexcept
Set memory address for given input.
Definition: NvInferRuntime.h:4529
bool getDebugSync() const noexcept
Get the debug sync flag.
Definition: NvInferRuntime.h:4043
bool setInputConsumedEvent(cudaEvent_t event) noexcept
Mark input as consumed.
Definition: NvInferRuntime.h:4614
Dims getTensorStrides(char const *tensorName) const noexcept
Return the strides of the buffer for the given tensor name.
Definition: NvInferRuntime.h:4167
bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
Set the verbosity of the NVTX markers in the execution context.
Definition: NvInferRuntime.h:4779
IProfiler * getProfiler() const noexcept
Get the profiler.
Definition: NvInferRuntime.h:4063
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:4289
void setDeviceMemoryV2(void *memory, int64_t size) noexcept
Set the device memory and its corresponding size for use by this execution context.
Definition: NvInferRuntime.h:4146
bool allInputDimensionsSpecified() const noexcept
Whether all dynamic dimensions of input tensors have been specified.
Definition: NvInferRuntime.h:4253
void setProfiler(IProfiler *profiler) noexcept
Set the profiler.
Definition: NvInferRuntime.h:4053
void setName(char const *name) noexcept
Set the name of the execution context.
Definition: NvInferRuntime.h:4087
cudaEvent_t getInputConsumedEvent() const noexcept
The event associated with consuming the input.
Definition: NvInferRuntime.h:4624
Object for constructing IDimensionExpr.
Definition: NvInferRuntime.h:287
IDimensionExpr const * operation(DimensionOperation op, IDimensionExpr const &first, IDimensionExpr const &second) noexcept
Get the operation.
Definition: NvInferRuntime.h:303
virtual ~IExprBuilder() noexcept=default
IDimensionExpr const * constant(int64_t value) noexcept
Return pointer to IDimensionExp for given value.
Definition: NvInferRuntime.h:292
apiv::VExprBuilder * mImpl
Definition: NvInferRuntime.h:310
IDimensionExpr const * declareSizeTensor(int32_t outputIndex, IDimensionExpr const &opt, IDimensionExpr const &upper)
Declare a size tensor at the given output index, with the specified auto-tuning formula and upper bou...
Definition: NvInferRuntime.h:338
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:142
void * data() const noexcept
A pointer to the raw data that is owned by the library.
Definition: NvInferRuntime.h:147
DataType type() const noexcept
The type of the memory that was allocated.
Definition: NvInferRuntime.h:159
std::size_t size() const noexcept
The size in bytes of the data that was allocated.
Definition: NvInferRuntime.h:153
apiv::VHostMemory * mImpl
Definition: NvInferRuntime.h:165
virtual ~IHostMemory() noexcept=default
A virtual base class to find a logger. Allows a plugin to find an instance of a logger if it needs to...
Definition: NvInferRuntime.h:5145
virtual ILogger * findLogger()=0
Get the logger used by the engine or execution context which called the plugin method.
virtual ~ILoggerFinder()=default
Application-implemented logging interface for the builder, refitter and runtime.
Definition: NvInferRuntime.h:1540
virtual ~ILogger()=default
Severity
The severity corresponding to a log message.
Definition: NvInferRuntime.h:1548
virtual void log(Severity severity, AsciiChar const *msg) noexcept=0
A callback implemented by the application to handle logging messages;.
Forward declaration of IEngineInspector for use by other interfaces.
Definition: NvInferRuntime.h:51
INoCopy & operator=(INoCopy &&other)=delete
INoCopy(INoCopy const &other)=delete
INoCopy(INoCopy &&other)=delete
virtual ~INoCopy()=default
INoCopy & operator=(INoCopy const &other)=delete
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:2616
int32_t const * getShapeValues(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2730
apiv::VOptimizationProfile * mImpl
Definition: NvInferRuntime.h:2783
Dims getDimensions(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:2657
virtual ~IOptimizationProfile() noexcept=default
float getExtraMemoryTarget() const noexcept
Get the extra memory target that has been defined for this profile.
Definition: NvInferRuntime.h:2760
bool setExtraMemoryTarget(float target) noexcept
Set a target for extra GPU memory that may be used by this profile.
Definition: NvInferRuntime.h:2748
bool setDimensions(char const *inputName, OptProfileSelector select, Dims const &dims) noexcept
Set the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:2645
bool setShapeValues(char const *inputName, OptProfileSelector select, int32_t const *values, int32_t nbValues) noexcept
Set the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2704
bool isValid() const noexcept
Check whether the optimization profile can be passed to an IBuilderConfig object.
Definition: NvInferRuntime.h:2777
int32_t getNbShapeValues(char const *inputName) const noexcept
Get the number of values for an input shape tensor.
Definition: NvInferRuntime.h:2718
Single registration point for all plugins in an application. It is used to find plugin implementation...
Definition: NvInferRuntimeCommon.h:56
virtual TRT_DEPRECATED bool registerCreator(IPluginCreator &creator, AsciiChar const *const pluginNamespace) noexcept=0
Register a plugin creator implementing IPluginCreator. Returns false if any plugin creator with the s...
Interface for plugins to access per context resources provided by TensorRT.
Definition: NvInferRuntime.h:767
virtual IErrorRecorder * getErrorRecorder() const noexcept=0
Get the error recorder associated with the resource context.
IPluginResourceContext & operator=(IPluginResourceContext const &) &=default
virtual IGpuAllocator * getGpuAllocator() const noexcept=0
Get the GPU allocator associated with the resource context.
Similar to IPluginV2Ext, but with support for dynamic shapes.
Definition: NvInferRuntime.h:407
IPluginV2DynamicExt * clone() const noexcept override=0
Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin...
virtual ~IPluginV2DynamicExt() noexcept
Definition: NvInferRuntime.h:558
Plugin class for user-implemented layers.
Definition: NvInferRuntimePlugin.h:468
Updates weights in an engine.
Definition: NvInferRuntime.h:2136
bool refitCudaEngineAsync(cudaStream_t stream) noexcept
Enqueue weights refitting of the associated engine on the given stream.
Definition: NvInferRuntime.h:2538
int32_t getMaxThreads() const noexcept
get the maximum number of threads that can be used by the refitter.
Definition: NvInferRuntime.h:2417
TensorLocation getWeightsLocation(char const *weightsName) const noexcept
Get location for the weights associated with the given name.
Definition: NvInferRuntime.h:2476
bool setNamedWeights(char const *name, Weights weights) noexcept
Specify new weights of given name.
Definition: NvInferRuntime.h:2341
int32_t getAllWeights(int32_t size, char const **weightsNames) noexcept
Get names of all weights that could be refit.
Definition: NvInferRuntime.h:2377
ILogger * getLogger() const noexcept
get the logger with which the refitter was created
Definition: NvInferRuntime.h:2387
bool refitCudaEngine() noexcept
Refits associated engine.
Definition: NvInferRuntime.h:2172
int32_t getMissingWeights(int32_t size, char const **weightsNames) noexcept
Get names of missing weights.
Definition: NvInferRuntime.h:2361
TRT_DEPRECATED float getDynamicRangeMax(char const *tensorName) const noexcept
Get maximum of dynamic range.
Definition: NvInferRuntime.h:2264
int32_t getMissing(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of missing weights.
Definition: NvInferRuntime.h:2193
Weights getNamedWeights(char const *weightsName) const noexcept
Get weights associated with the given name.
Definition: NvInferRuntime.h:2460
bool unsetNamedWeights(char const *weightsName) noexcept
Unset weights associated with the given name.
Definition: NvInferRuntime.h:2492
Weights getWeightsPrototype(char const *weightsName) const noexcept
Get the Weights prototype associated with the given name.
Definition: NvInferRuntime.h:2556
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:2403
TRT_DEPRECATED float getDynamicRangeMin(char const *tensorName) const noexcept
Get minimum of dynamic range.
Definition: NvInferRuntime.h:2248
TRT_DEPRECATED int32_t getTensorsWithDynamicRange(int32_t size, char const **tensorNames) const noexcept
Get names of all tensors that have refittable dynamic ranges.
Definition: NvInferRuntime.h:2282
bool setNamedWeights(char const *name, Weights weights, TensorLocation location) noexcept
Specify new weights on a specified device of given name.
Definition: NvInferRuntime.h:2444
void setWeightsValidation(bool weightsValidation) noexcept
Set whether to validate weights during refitting.
Definition: NvInferRuntime.h:2508
TRT_DEPRECATED bool setDynamicRange(char const *tensorName, float min, float max) noexcept
Definition: NvInferRuntime.h:2232
apiv::VRefitter * mImpl
Definition: NvInferRuntime.h:2562
int32_t getAll(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of all weights that could be refit.
Definition: NvInferRuntime.h:2210
virtual ~IRefitter() noexcept=default
bool getWeightsValidation() const noexcept
Get whether to validate weights values during refitting.
Definition: NvInferRuntime.h:2516
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:2301
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:2316
Allows a serialized functionally unsafe engine to be deserialized.
Definition: NvInferRuntime.h:1817
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:1981
IRuntime * loadRuntime(char const *path) noexcept
Load IRuntime from the file.
Definition: NvInferRuntime.h:2097
bool getEngineHostCodeAllowed() const noexcept
Get whether the runtime is allowed to deserialize engines with host executable code.
Definition: NvInferRuntime.h:2119
TempfileControlFlags getTempfileControlFlags() const noexcept
Get the tempfile control flags for this runtime.
Definition: NvInferRuntime.h:2069
void setEngineHostCodeAllowed(bool allowed) noexcept
Set whether the runtime is allowed to deserialize engines with host executable code.
Definition: NvInferRuntime.h:2109
virtual ~IRuntime() noexcept=default
void setTemporaryDirectory(char const *path) noexcept
Set the directory that will be used by this runtime for temporary files.
Definition: NvInferRuntime.h:2030
IPluginRegistry & getPluginRegistry() noexcept
Get the local plugin registry that can be used by the runtime.
Definition: NvInferRuntime.h:2079
apiv::VRuntime * mImpl
Definition: NvInferRuntime.h:2125
int32_t getNbDLACores() const noexcept
Returns number of DLA hardware cores accessible or 0 if DLA is unavailable.
Definition: NvInferRuntime.h:1850
ICudaEngine * deserializeCudaEngine(void const *blob, std::size_t size) noexcept
Deserialize an engine from host memory.
Definition: NvInferRuntime.h:1914
void setTempfileControlFlags(TempfileControlFlags flags) noexcept
Set the tempfile control flags for this runtime.
Definition: NvInferRuntime.h:2057
int32_t getDLACore() const noexcept
Get the DLA core that the engine executes on.
Definition: NvInferRuntime.h:1842
void setGpuAllocator(IGpuAllocator *allocator) noexcept
Set the GPU allocator.
Definition: NvInferRuntime.h:1865
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:1899
ICudaEngine * deserializeCudaEngine(IStreamReaderV2 &streamReader)
Deserialize an engine from a stream. IStreamReaderV2 is expected to support reading to both host and ...
Definition: NvInferRuntime.h:1956
ILogger * getLogger() const noexcept
get the logger with which the runtime was created
Definition: NvInferRuntime.h:1966
int32_t getMaxThreads() const noexcept
Get the maximum number of threads that can be used by the runtime.
Definition: NvInferRuntime.h:1995
char const * getTemporaryDirectory() const noexcept
Get the directory that will be used by this runtime for temporary files.
Definition: NvInferRuntime.h:2041
TRT_DEPRECATED ICudaEngine * deserializeCudaEngine(IStreamReader &streamReader)
Deserialize an engine from a stream.
Definition: NvInferRuntime.h:1935
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:1884
Holds properties for configuring an engine to serialize the binary.
Definition: NvInferRuntime.h:2893
virtual ~ISerializationConfig() noexcept=default
bool clearFlag(SerializationFlag serializationFlag) noexcept
clear a serialization flag.
Definition: NvInferRuntime.h:2932
bool setFlag(SerializationFlag serializationFlag) noexcept
Set a serialization flag.
Definition: NvInferRuntime.h:2944
SerializationFlags getFlags() const noexcept
Get the serialization flags for this config.
Definition: NvInferRuntime.h:2920
bool getFlag(SerializationFlag serializationFlag) const noexcept
Returns true if the serialization flag is set.
Definition: NvInferRuntime.h:2956
apiv::VSerializationConfig * mImpl
Definition: NvInferRuntime.h:2962
An Interface class for version control.
Definition: NvInferRuntimeBase.h:260
Version information associated with a TRT interface.
Definition: NvInferRuntimeBase.h:225
Register the plugin creator to the registry The static registry object will be instantiated when the ...
Definition: NvInferRuntime.h:5117
PluginRegistrar()
Definition: NvInferRuntime.h:5119
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:124
DataType type
The type of the weights.
Definition: NvInferRuntime.h:126
int64_t count
The number of weights in the array.
Definition: NvInferRuntime.h:128
void const * values
The weight values, in a contiguous array.
Definition: NvInferRuntime.h:127
Definition: NvInferRuntime.h:3972
virtual bool processDebugTensor(void const *addr, TensorLocation location, DataType type, Dims const &shape, char const *name, cudaStream_t stream)=0
Callback function that is called when a debug tensor’s value is updated and the debug state of the te...
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:3977
~IDebugListener() override=default
Definition: NvInferRuntimeBase.h:397
Definition: NvInferRuntime.h:1608
virtual void * allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t) noexcept
A thread-safe callback implemented by the application to handle stream-ordered acquisition of GPU mem...
Definition: NvInferRuntime.h:1730
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1771
virtual TRT_DEPRECATED bool deallocate(void *const memory) noexcept=0
A thread-safe callback implemented by the application to handle release of GPU memory.
~IGpuAllocator() override=default
virtual void * reallocate(void *const, uint64_t, uint64_t) noexcept
A thread-safe callback implemented by the application to resize an existing allocation.
Definition: NvInferRuntime.h:1677
virtual TRT_DEPRECATED void * allocate(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept=0
A thread-safe callback implemented by the application to handle acquisition of GPU memory.
virtual bool deallocateAsync(void *const memory, cudaStream_t) noexcept
A thread-safe callback implemented by the application to handle stream-ordered release of GPU memory.
Definition: NvInferRuntime.h:1763
Definition: NvInferRuntime.h:5166
bool deallocateAsync(void *const memory, cudaStream_t) noexcept override=0
A thread-safe callback implemented by the application to handle stream-ordered asynchronous release o...
void * allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t) noexcept override=0
A thread-safe callback implemented by the application to handle stream-ordered asynchronous acquisiti...
TRT_DEPRECATED void * allocate(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept override
A thread-safe callback implemented by the application to handle acquisition of GPU memory.
Definition: NvInferRuntime.h:5252
TRT_DEPRECATED bool deallocate(void *const memory) noexcept override
A thread-safe callback implemented by the application to handle release of GPU memory.
Definition: NvInferRuntime.h:5276
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:5284
~IGpuAsyncAllocator() override=default
Definition: NvInferRuntime.h:3885
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:3890
virtual void * reallocateOutputAsync(char const *tensorName, void *currentMemory, uint64_t size, uint64_t alignment, cudaStream_t)
Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated....
Definition: NvInferRuntime.h:3942
virtual void notifyShape(char const *tensorName, Dims const &dims) noexcept=0
Called by TensorRT when the shape of the output tensor is known.
virtual TRT_DEPRECATED void * reallocateOutput(char const *tensorName, void *currentMemory, uint64_t size, uint64_t alignment) noexcept
Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated....
Definition: NvInferRuntime.h:3914
Definition: NvInferPluginBase.h:139
Definition: NvInferPluginBase.h:191
Definition: NvInferRuntime.h:5291
virtual PluginFieldCollection const * getFieldNames() noexcept=0
Return a list of fields that need to be passed to createPlugin() when creating a plugin for use in th...
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:5296
virtual IPluginV3 * createPlugin(AsciiChar const *name, PluginFieldCollection const *fc, TensorRTPhase phase) noexcept=0
Return a plugin object. Return nullptr in case of error.
Definition: NvInferPluginBase.h:204
Definition: NvInferRuntime.h:836
virtual int32_t getFormatCombinationLimit() noexcept
Return the maximum number of format combinations that will be timed by TensorRT during the build phas...
Definition: NvInferRuntime.h:1039
virtual int32_t getNbOutputs() const noexcept=0
Get the number of outputs from the plugin.
virtual int32_t configurePlugin(DynamicPluginTensorDesc const *in, int32_t nbInputs, DynamicPluginTensorDesc const *out, int32_t nbOutputs) noexcept=0
Configure the plugin.
virtual int32_t getNbTactics() noexcept
Query for the number of custom tactics the plugin intends to use.
Definition: NvInferRuntime.h:1015
virtual char const * getMetadataString() noexcept
Query for a string representing the configuration of the plugin. May be called anytime after plugin c...
Definition: NvInferRuntime.h:1050
virtual char const * getTimingCacheID() noexcept
Called to query the suffix to use for the timing cache ID. May be called anytime after plugin creatio...
Definition: NvInferRuntime.h:1031
virtual bool supportsFormatCombination(int32_t pos, DynamicPluginTensorDesc const *inOut, int32_t nbInputs, int32_t nbOutputs) noexcept=0
Return true if plugin supports the format and datatype for the input/output indexed by pos.
virtual int32_t getOutputDataTypes(DataType *outputTypes, int32_t nbOutputs, const DataType *inputTypes, int32_t nbInputs) const noexcept=0
Provide the data types of the plugin outputs if the input tensors have the data types provided.
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:848
virtual int32_t getOutputShapes(DimsExprs const *inputs, int32_t nbInputs, DimsExprs const *shapeInputs, int32_t nbShapeInputs, DimsExprs *outputs, int32_t nbOutputs, IExprBuilder &exprBuilder) noexcept=0
Provide expressions for computing dimensions of the output tensors from dimensions of the input tenso...
virtual int32_t getValidTactics(int32_t *tactics, int32_t nbTactics) noexcept
Query for any custom tactics that the plugin intends to use.
Definition: NvInferRuntime.h:1007
Definition: NvInferRuntime.h:793
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:798
virtual AsciiChar const * getPluginName() const noexcept=0
Return the plugin name. Should match the plugin name returned by the corresponding plugin creator.
Definition: NvInferRuntime.h:1057
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1062
virtual int32_t onShapeChange(PluginTensorDesc const *in, int32_t nbInputs, PluginTensorDesc const *out, int32_t nbOutputs) noexcept=0
Called when a plugin is being prepared for execution for specific dimensions. This could happen multi...
virtual PluginFieldCollection const * getFieldsToSerialize() noexcept=0
Get the plugin fields which should be serialized.
virtual int32_t setTactic(int32_t tactic) noexcept
Set the tactic to be used in the subsequent call to enqueue(). If no custom tactics were advertised,...
Definition: NvInferRuntime.h:1074
virtual int32_t enqueue(PluginTensorDesc const *inputDesc, PluginTensorDesc const *outputDesc, void const *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept=0
Execute the layer.
virtual IPluginV3 * attachToContext(IPluginResourceContext *context) noexcept=0
Clone the plugin, attach the cloned plugin object to a execution context and grant the cloned plugin ...
Definition: NvInferRuntime.h:1245
virtual ~IProfiler() noexcept
Definition: NvInferRuntime.h:1256
virtual void reportLayerTime(char const *layerName, float ms) noexcept=0
Layer time reporting callback.
Definition: NvInferRuntime.h:631
~IStreamReader() override=default
IStreamReader & operator=(IStreamReader const &) &=default
IStreamReader & operator=(IStreamReader &&) &=default
virtual int64_t read(void *destination, int64_t nbBytes)=0
Read the next number of bytes in the stream.
IStreamReader(IStreamReader &&)=default
IStreamReader(IStreamReader const &)=default
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:643
Definition: NvInferRuntime.h:695
IStreamReaderV2 & operator=(IStreamReaderV2 const &) &=default
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:707
IStreamReaderV2(IStreamReaderV2 &&)=default
~IStreamReaderV2() override=default
virtual int64_t read(void *destination, int64_t nbBytes, cudaStream_t stream) noexcept=0
Read the next number of bytes in the stream asynchronously.
IStreamReaderV2(IStreamReaderV2 const &)=default
virtual bool seek(int64_t offset, SeekPosition where) noexcept=0
Sets the position of the stream to the given offset.
IStreamReaderV2 & operator=(IStreamReaderV2 &&) &=default
Definition: NvInferRuntime.h:1154
virtual int32_t getAliasedInput(int32_t outputIndex) noexcept
Communicates to TensorRT that the output at the specified output index is aliased to the input at the...
Definition: NvInferRuntime.h:1190
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1156
IRefitter * createInferRefitter(ICudaEngine &engine, ILogger &logger) noexcept
Create an instance of an IRefitter class.
Definition: NvInferRuntime.h:5097
IRuntime * createInferRuntime(ILogger &logger) noexcept
Create an instance of an IRuntime class.
Definition: NvInferRuntime.h:5087
The TensorRT API version 1 namespace.
uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferRuntime.h:2832
v_1_0::IOutputAllocator IOutputAllocator
Definition: NvInferRuntime.h:3967
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:76
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:179
@ kSUB
Substract the second element from the first.
@ kSUM
Sum of the two operands.
@ kPROD
Product of the two operands.
@ kFLOOR_DIV
Floor division of the first element by the second.
@ kEQUAL
1 if operands are equal, 0 otherwise.
@ kMIN
Minimum of the two operands.
@ kLESS
1 if first operand is less than second operand, 0 otherwise.
@ kMAX
Maximum of the two operands.
@ kCEIL_DIV
Division rounding up.
v_1_0::IPluginV3OneCore IPluginV3OneCore
Definition: NvInferRuntime.h:1207
TensorIOMode
Definition of tensor IO Mode.
Definition: NvInferRuntimeBase.h:640
HardwareCompatibilityLevel
Describes requirements of compatibility with GPU architectures other than that of the GPU on which th...
Definition: NvInfer.h:8760
SerializationFlag
List of valid flags that the engine can enable when serializing the bytes.
Definition: NvInferRuntime.h:2873
@ kEXCLUDE_WEIGHTS
Exclude the weights that can be refitted.
v_1_0::IProfiler IProfiler
Definition: NvInferRuntime.h:1271
SeekPosition
Controls the seek mode of IStreamReaderV2.
Definition: NvInferRuntime.h:681
@ kSET
From the beginning of the file.
@ kCUR
From the current position of the file.
@ kEND
From the tail of the file.
v_1_0::IStreamReaderV2 IStreamReaderV2
Definition: NvInferRuntime.h:751
uint32_t TempfileControlFlags
Represents a collection of one or more TempfileControlFlag values combined using bitwise-OR operation...
Definition: NvInferRuntime.h:1349
v_1_0::IGpuAllocator IGpuAllocator
Definition: NvInferRuntime.h:1807
constexpr int32_t EnumMax< WeightsRole >() noexcept
Maximum number of elements in WeightsRole enum.
Definition: NvInferRuntime.h:1292
char_t AsciiChar
Definition: NvInferRuntimeBase.h:105
TensorRTPhase
Indicates a phase of operation of TensorRT.
Definition: NvInferPluginBase.h:114
@ kV2_DYNAMICEXT
IPluginV2DynamicExt.
constexpr int32_t EnumMax< LayerInformationFormat >() noexcept
Definition: NvInferRuntime.h:4911
DataType
The type of weights and tensors.
Definition: NvInferRuntimeBase.h:133
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:1303
@ kSCALE
Scale layer.
@ kCONSTANT
Constant layer.
v_1_0::IDebugListener IDebugListener
Definition: NvInferRuntime.h:4008
TempfileControlFlag
Flags used to control TensorRT's behavior when creating executable temporary files.
Definition: NvInferRuntime.h:1326
@ kALLOW_IN_MEMORY_FILES
Allow creating and loading files in-memory (or unnamed files).
constexpr int32_t EnumMax< OptProfileSelector >() noexcept
Number of different values of OptProfileSelector enum.
Definition: NvInferRuntime.h:2588
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:1281
@ kSHIFT
shift part of IScaleLayer
@ kANY
Any other weights role.
@ kBIAS
bias for IConvolutionLayer or IDeconvolutionLayer
@ kKERNEL
kernel for IConvolutionLayer or IDeconvolutionLayer
constexpr int32_t EnumMax< ProfilingVerbosity >() noexcept
Maximum number of profile verbosity levels in ProfilingVerbosity enum.
Definition: NvInferRuntime.h:2852
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
Definition: NvInferRuntime.h:2844
@ kLAYER_NAMES_ONLY
Print only the layer names. This is the default setting.
@ kDETAILED
Print detailed layer information including layer names and layer parameters.
TacticSource
List of tactic sources for TensorRT.
Definition: NvInferRuntime.h:2795
TensorFormat PluginFormat
PluginFormat is reserved for backward compatibility.
Definition: NvInferRuntimePlugin.h:54
v_1_0::IPluginV3OneRuntime IPluginV3OneRuntime
Definition: NvInferRuntime.h:1231
@ kMIN
Minimum of the two elements.
constexpr int32_t EnumMax< TempfileControlFlag >() noexcept
Maximum number of elements in TempfileControlFlag enum.
Definition: NvInferRuntime.h:1338
uint32_t SerializationFlags
Represents one or more SerializationFlag values using binary OR operations, e.g., 1U << Serialization...
Definition: NvInferRuntime.h:2863
@ kLINEAR
Supports linear (1D), bilinear (2D), and trilinear (3D) interpolation.
v_1_0::IPluginV3OneBuild IPluginV3OneBuild
Definition: NvInferRuntime.h:1219
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntime.h:1382
ExecutionContextAllocationStrategy
Different memory allocation behaviors for IExecutionContext.
Definition: NvInferRuntime.h:2977
@ kSTATIC
Default static allocation with the maximum size across all profiles.
@ kUSER_MANAGED
The user supplies custom allocation to the execution context.
@ kON_PROFILE_CHANGE
Reallocate for a profile when it's selected.
constexpr int32_t EnumMax< TacticSource >() noexcept
Maximum number of tactic sources in TacticSource enum.
Definition: NvInferRuntime.h:2821
LayerInformationFormat
The format in which the IEngineInspector prints the layer information.
Definition: NvInferRuntime.h:4903
@ kJSON
Print layer information in JSON format.
@ kONELINE
Print layer information in one line per layer.
v_1_0::IStreamReader IStreamReader
Definition: NvInferRuntime.h:674
AllocatorFlag
Allowed type of memory allocation.
Definition: NvInferRuntime.h:1506
@ kRESIZABLE
TensorRT may call realloc() on this allocation.
@ kMAX
Maximum over elements.
constexpr int32_t EnumMax< DeviceType >() noexcept
Maximum number of elements in DeviceType enum.
Definition: NvInferRuntime.h:1310
constexpr int32_t EnumMax< DimensionOperation >() noexcept
Maximum number of elements in DimensionOperation enum.
Definition: NvInferRuntime.h:193
constexpr int32_t EnumMax< ExecutionContextAllocationStrategy >() noexcept
Maximum number of memory allocation strategies in ExecutionContextAllocationStrategy enum.
Definition: NvInferRuntime.h:2989
constexpr int32_t EnumMax< SerializationFlag >() noexcept
Maximum number of serialization flags in SerializationFlag enum.
Definition: NvInferRuntime.h:2880
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntime.h:204
@ kHOST
Data stored on host.
@ kDEVICE
Data stored on device.
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:2576
@ kOPT
This is used to set or get the value that is used in the optimization (kernel selection).
uint32_t AllocatorFlags
Definition: NvInferRuntime.h:1522
Summarizes tensors that a plugin might see for an input or output.
Definition: NvInferRuntime.h:362
Dims min
Lower bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:367
Dims max
Upper bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:370
Dims opt
Optimum value of tensor’s dimensions specified for auto-tuning.
Definition: NvInferRuntime.h:373
PluginTensorDesc desc
Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of ...
Definition: NvInferRuntime.h:364
Plugin field collection struct.
Definition: NvInferPluginBase.h:101
Fields that a plugin might see for an input or output.
Definition: NvInferRuntimePlugin.h:73
Declaration of EnumMaxImpl struct to store maximum number of elements in an enumeration type.
Definition: NvInferRuntimeBase.h:118

  Copyright © 2024 NVIDIA Corporation
  Privacy Policy | Manage My Privacy | Do Not Sell or Share My Data | Terms of Service | Accessibility | Corporate Policies | Product Security | Contact