TensorRT 11.0.0
NvInferRuntime.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright (c) 1993-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef NV_INFER_RUNTIME_H
19#define NV_INFER_RUNTIME_H
20
26
27#include "NvInferImpl.h" // IWYU pragma: export
28#define NV_INFER_INTERNAL_INCLUDE 1
29#include "NvInferPluginBase.h" // IWYU pragma: export
30#undef NV_INFER_INTERNAL_INCLUDE
31#include "NvInferRuntimeCommon.h" // IWYU pragma: export
32
33namespace nvinfer1
34{
35
36class IExecutionContext;
37class ICudaEngine;
38class IPluginFactory;
39class IEngineInspector;
40
49
51{
52protected:
53 INoCopy() = default;
54 virtual ~INoCopy() = default;
55 INoCopy(INoCopy const& other) = delete;
56 INoCopy& operator=(INoCopy const& other) = delete;
57 INoCopy(INoCopy&& other) = delete;
58 INoCopy& operator=(INoCopy&& other) = delete;
59};
60
75enum class EngineCapability : int32_t
76{
81 kSTANDARD = 0,
82
89 kSAFETY = 1,
90
97};
98
100template <>
102{
103 static constexpr int32_t kVALUE = 3;
104};
105
121{
122public:
124 void const* values;
125 int64_t count;
126};
127
138class IHostMemory : public INoCopy
139{
140public:
141 virtual ~IHostMemory() noexcept = 0;
142
144 void* data() const noexcept
145 {
146 return mImpl->data();
147 }
148
150 std::size_t size() const noexcept
151 {
152 return mImpl->size();
153 }
154
156 DataType type() const noexcept
157 {
158 return mImpl->type();
159 }
160
161protected:
162 apiv::VHostMemory* mImpl;
163};
164
165inline IHostMemory::~IHostMemory() noexcept = default;
166
177enum class DimensionOperation : int32_t
178{
179 kSUM = 0,
180 kPROD = 1,
181 kMAX = 2,
182 kMIN = 3,
183 kSUB = 4,
184 kEQUAL = 5,
185 kLESS = 6,
186 kFLOOR_DIV = 7,
187 kCEIL_DIV = 8
188};
189
191template <>
193{
194 static constexpr int32_t kVALUE = 9;
195};
196
202enum class TensorLocation : int32_t
203{
204 kDEVICE = 0,
205 kHOST = 1,
206};
207
209template <>
211{
212 static constexpr int32_t kVALUE = 2;
213};
214
228{
229public:
233 bool isConstant() const noexcept
234 {
235 return mImpl->isConstant();
236 }
237
244 int64_t getConstantValue() const noexcept
245 {
246 return mImpl->getConstantValue();
247 }
248
249protected:
250 apiv::VDimensionExpr* mImpl;
251 virtual ~IDimensionExpr() noexcept = 0;
252
253public:
259 bool isSizeTensor() const noexcept
260 {
261 return mImpl->isSizeTensor();
262 }
263};
264
265inline IDimensionExpr::~IDimensionExpr() noexcept = default;
266
284class IExprBuilder : public INoCopy
285{
286public:
290 IDimensionExpr const* constant(int64_t value) noexcept
291 {
292 return mImpl->constant(value);
293 }
294
302 DimensionOperation op, IDimensionExpr const& first, IDimensionExpr const& second) noexcept
303 {
304 return mImpl->operation(op, first, second);
305 }
306
307protected:
308 apiv::VExprBuilder* mImpl;
309 virtual ~IExprBuilder() noexcept = 0;
310
311public:
336 IDimensionExpr const* declareSizeTensor(int32_t outputIndex, IDimensionExpr const& opt, IDimensionExpr const& upper)
337 {
338 return mImpl->declareSizeTensor(outputIndex, opt, upper);
339 }
340};
341
342inline IExprBuilder::~IExprBuilder() noexcept = default;
343
350{
351public:
352 int32_t nbDims;
354};
355
362{
365
368
371
374};
375
407{
408public:
409 IPluginV2DynamicExt* clone() const noexcept override = 0;
410
435 virtual DimsExprs getOutputDimensions(
436 int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept = 0;
437
441 static constexpr int32_t kFORMAT_COMBINATION_LIMIT = 100;
442
475 virtual bool supportsFormatCombination(
476 int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
477
513 virtual void configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs,
514 DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
515
525 virtual size_t getWorkspaceSize(PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs,
526 int32_t nbOutputs) const noexcept = 0;
527
540 virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc,
541 void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0;
542
543protected:
551 int32_t getTensorRTVersion() const noexcept override
552 {
553 return (static_cast<int32_t>(PluginVersion::kV2_DYNAMICEXT) << 24 | (NV_TENSORRT_VERSION & 0xFFFFFF));
554 }
555
556 virtual ~IPluginV2DynamicExt() noexcept {}
557
558private:
559 // Following are obsolete base class methods, and must not be implemented or used.
560
564 void configurePlugin(Dims const*, int32_t, Dims const*, int32_t, DataType const*, DataType const*, bool const*,
565 bool const*, PluginFormat, int32_t) noexcept final
566 {
567 }
568
572 bool supportsFormat(DataType, PluginFormat) const noexcept final
573 {
574 return false;
575 }
576
580 Dims getOutputDimensions(int32_t, Dims const*, int32_t) noexcept final
581 {
582 return Dims{-1, {}};
583 }
584
588 size_t getWorkspaceSize(int32_t) const noexcept final
589 {
590 return 0;
591 }
592
596 int32_t enqueue(int32_t, void const* const*, void* const*, void*, cudaStream_t) noexcept final
597 {
598 return 1;
599 }
600};
601
602namespace v_1_0
603{
608{
609public:
614 ~IStreamReader() override = default;
615 IStreamReader() = default;
616
620 InterfaceInfo getInterfaceInfo() const noexcept override
621 {
622 return InterfaceInfo{"IStreamReader", 1, 0};
623 }
624
633 virtual int64_t read(void* destination, int64_t nbBytes) = 0;
634
635protected:
636 IStreamReader(IStreamReader const&) = default;
640};
641
643{
644public:
649 ~IStreamWriter() override = default;
650 IStreamWriter() = default;
651
655 InterfaceInfo getInterfaceInfo() const noexcept override
656 {
657 return InterfaceInfo{"IStreamWriter", 1, 0};
658 }
659
669 virtual int64_t write(void const* data, int64_t nbBytes) = 0;
670
671protected:
672 IStreamWriter(IStreamWriter const&) = default;
676};
677} // namespace v_1_0
678
690
700
705enum class SeekPosition : int32_t
706{
708 kSET = 0,
709
711 kCUR = 1,
712
714 kEND = 2,
715};
716
717namespace v_1_0
718{
720{
721public:
726 ~IStreamReaderV2() override = default;
727 IStreamReaderV2() = default;
728
732 InterfaceInfo getInterfaceInfo() const noexcept override
733 {
734 return InterfaceInfo{"IStreamReaderV2", 1, 0};
735 }
736
747 virtual int64_t read(void* destination, int64_t nbBytes, cudaStream_t stream) noexcept = 0;
748
757 virtual bool seek(int64_t offset, SeekPosition where) noexcept = 0;
758
759protected:
764};
765} // namespace v_1_0
766
777
792{
793public:
798 virtual IGpuAllocator* getGpuAllocator() const noexcept = 0;
799
804 virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
805 virtual ~IPluginResourceContext() noexcept = 0;
806
807protected:
811 IPluginResourceContext& operator=(IPluginResourceContext const&) & = default;
813};
814
815inline IPluginResourceContext::~IPluginResourceContext() noexcept = default;
816
817namespace v_1_0
818{
820{
821public:
825 InterfaceInfo getInterfaceInfo() const noexcept override
826 {
827 return InterfaceInfo{"PLUGIN_V3ONE_CORE", 1, 0};
828 }
829
838 virtual AsciiChar const* getPluginName() const noexcept = 0;
839
848 virtual AsciiChar const* getPluginVersion() const noexcept = 0;
849
859 virtual AsciiChar const* getPluginNamespace() const noexcept = 0;
860};
861
863{
864public:
870 static constexpr int32_t kDEFAULT_FORMAT_COMBINATION_LIMIT = 100;
871
875 InterfaceInfo getInterfaceInfo() const noexcept override
876 {
877 return InterfaceInfo{"PLUGIN_V3ONE_BUILD", 1, 0};
878 }
879
899 virtual int32_t configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs,
900 DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
901
918 virtual int32_t getOutputDataTypes(
919 DataType* outputTypes, int32_t nbOutputs, const DataType* inputTypes, int32_t nbInputs) const noexcept = 0;
920
942 virtual int32_t getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, DimsExprs const* shapeInputs,
943 int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept = 0;
944
980 int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
981
987 virtual int32_t getNbOutputs() const noexcept = 0;
988
998 virtual size_t getWorkspaceSize(DynamicPluginTensorDesc const* /* inputs */, int32_t /* nbInputs */,
999 DynamicPluginTensorDesc const* /* outputs */, int32_t /* nbOutputs */) const noexcept
1000 {
1001 return 0;
1002 }
1003
1035 virtual int32_t getValidTactics(int32_t* /* tactics */, int32_t /* nbTactics */) noexcept
1036 {
1037 return 0;
1038 }
1039
1043 virtual int32_t getNbTactics() noexcept
1044 {
1045 return 0;
1046 }
1047
1059 virtual char const* getTimingCacheID() noexcept
1060 {
1061 return nullptr;
1062 }
1063
1067 virtual int32_t getFormatCombinationLimit() noexcept
1068 {
1069 return kDEFAULT_FORMAT_COMBINATION_LIMIT;
1070 }
1071
1078 virtual char const* getMetadataString() noexcept
1079 {
1080 return nullptr;
1081 }
1082};
1083
1085{
1086public:
1090 InterfaceInfo getInterfaceInfo() const noexcept override
1091 {
1092 return InterfaceInfo{"PLUGIN_V3ONE_RUNTIME", 1, 0};
1093 }
1094
1102 virtual int32_t setTactic(int32_t /* tactic */) noexcept
1103 {
1104 return 0;
1105 }
1106
1125 virtual int32_t onShapeChange(
1126 PluginTensorDesc const* in, int32_t nbInputs, PluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
1127
1141 virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc,
1142 void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0;
1143
1163 virtual IPluginV3* attachToContext(IPluginResourceContext* context) noexcept = 0;
1164
1170
1174 virtual PluginFieldCollection const* getFieldsToSerialize() noexcept = 0;
1175};
1176} // namespace v_1_0
1177
1178namespace v_2_0
1179{
1180
1182{
1183public:
1184 InterfaceInfo getInterfaceInfo() const noexcept override
1185 {
1186 return InterfaceInfo{"PLUGIN_V3ONE_BUILD", 2, 0};
1187 }
1188
1218 virtual int32_t getAliasedInput(int32_t /* outputIndex */) noexcept
1219 {
1220 return -1;
1221 }
1222};
1223
1224} // namespace v_2_0
1225
1236
1248
1260
1269
1272namespace v_1_0
1273{
1275{
1276public:
1280 InterfaceInfo getInterfaceInfo() const noexcept override
1281 {
1282 return {"IProfiler", 1, 0};
1283 }
1284
1292 virtual void reportLayerTime(char const* layerName, float ms) noexcept = 0;
1293
1294 ~IProfiler() override = default;
1295};
1296} // namespace v_1_0
1297
1310
1318enum class WeightsRole : int32_t
1319{
1320 kKERNEL = 0,
1321 kBIAS = 1,
1322 kSHIFT = 2,
1323 kSCALE = 3,
1324 kCONSTANT = 4,
1325 kANY = 5,
1326};
1327
1329template <>
1331{
1332 static constexpr int32_t kVALUE = 6;
1333};
1334
1340enum class DeviceType : int32_t
1341{
1342 kGPU = 0,
1343 kDLA = 1,
1344};
1345
1347template <>
1349{
1350 static constexpr int32_t kVALUE = 2;
1351};
1352
1363enum class TempfileControlFlag : int32_t
1364{
1367
1372};
1373
1375template <>
1377{
1378 static constexpr int32_t kVALUE = 2;
1379};
1380
1387using TempfileControlFlags = uint32_t;
1388
1420enum class TensorFormat : int32_t
1421{
1428 kLINEAR = 0,
1429
1434 kCHW2 = 1,
1435
1439 kHWC8 = 2,
1440
1454 kCHW4 = 3,
1455
1462 kCHW16 = 4,
1463
1471 kCHW32 = 5,
1472
1477 kDHWC8 = 6,
1478
1483 kCDHW32 = 7,
1484
1488 kHWC = 8,
1489
1498 kDLA_LINEAR = 9,
1499
1513 kDLA_HWC4 = 10,
1514
1519 kHWC16 = 11,
1520
1525 kDHWC = 12
1526};
1527
1529template <>
1531{
1533 static constexpr int32_t kVALUE = 13;
1534};
1535
1541enum class AllocatorFlag : int32_t
1542{
1544 kRESIZABLE = 0,
1545};
1546
1548template <>
1550{
1552 static constexpr int32_t kVALUE = 1;
1553};
1554
1555using AllocatorFlags = uint32_t;
1556
1559namespace v_1_0
1560{
1561
1575{
1576public:
1580 InterfaceInfo getInterfaceInfo() const noexcept override
1581 {
1582 return {"ILogger", 1, 0};
1583 }
1584
1590 enum class Severity : int32_t
1591 {
1593 kINTERNAL_ERROR = 0,
1595 kERROR = 1,
1597 kWARNING = 2,
1599 kINFO = 3,
1601 kVERBOSE = 4,
1602 };
1603
1622 virtual void log(Severity severity, AsciiChar const* msg) noexcept = 0;
1623
1624 ILogger() = default;
1625 ~ILogger() override = default;
1626
1627protected:
1628 // @cond SuppressDoxyWarnings
1629 ILogger(ILogger const&) = default;
1630 ILogger(ILogger&&) = default;
1631 ILogger& operator=(ILogger const&) & = default;
1632 ILogger& operator=(ILogger&&) & = default;
1633 // @endcond
1634};
1635
1636} // namespace v_1_0
1637
1638using ILogger = v_1_0::ILogger;
1639
1641template <>
1643{
1645 static constexpr int32_t kVALUE = 5;
1646};
1647
1648namespace v_1_0
1649{
1650
1652{
1653public:
1679 uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept = 0;
1680
1681 ~IGpuAllocator() override = default;
1682 IGpuAllocator() = default;
1683
1721 virtual void* reallocate(void* const /*baseAddr*/, uint64_t /*alignment*/, uint64_t /*newSize*/) noexcept
1722 {
1723 return nullptr;
1724 }
1725
1744 TRT_DEPRECATED virtual bool deallocate(void* const memory) noexcept = 0;
1745
1774 virtual void* allocateAsync(
1775 uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t /*stream*/) noexcept
1776 {
1777 return allocate(size, alignment, flags);
1778 }
1807 virtual bool deallocateAsync(void* const memory, cudaStream_t /*stream*/) noexcept
1808 {
1809 return deallocate(memory);
1810 }
1811
1815 InterfaceInfo getInterfaceInfo() const noexcept override
1816 {
1817 return {"IGpuAllocator", 1, 0};
1818 }
1819
1820protected:
1821 // @cond SuppressDoxyWarnings
1822 IGpuAllocator(IGpuAllocator const&) = default;
1823 IGpuAllocator(IGpuAllocator&&) = default;
1824 IGpuAllocator& operator=(IGpuAllocator const&) & = default;
1825 IGpuAllocator& operator=(IGpuAllocator&&) & = default;
1826 // @endcond
1827};
1828
1829} // namespace v_1_0
1830
1852
1853
1861class IRuntime : public INoCopy
1862{
1863public:
1864 virtual ~IRuntime() noexcept = 0;
1865
1877 void setDLACore(int32_t dlaCore) noexcept
1878 {
1879 mImpl->setDLACore(dlaCore);
1880 }
1881
1887 int32_t getDLACore() const noexcept
1888 {
1889 return mImpl->getDLACore();
1890 }
1891
1895 int32_t getNbDLACores() const noexcept
1896 {
1897 return mImpl->getNbDLACores();
1898 }
1899
1911 void setGpuAllocator(IGpuAllocator* allocator) noexcept
1912 {
1913 mImpl->setGpuAllocator(allocator);
1914 }
1915
1927 //
1930 void setErrorRecorder(IErrorRecorder* recorder) noexcept
1931 {
1932 mImpl->setErrorRecorder(recorder);
1933 }
1934
1946 {
1947 return mImpl->getErrorRecorder();
1948 }
1949
1963 ICudaEngine* deserializeCudaEngine(void const* blob, std::size_t size) noexcept
1964 {
1965 return mImpl->deserializeCudaEngine(blob, size);
1966 }
1967
1987 {
1988 return mImpl->deserializeCudaEngineV2(streamReader);
1989 }
1990
1996 ILogger* getLogger() const noexcept
1997 {
1998 return mImpl->getLogger();
1999 }
2000
2011 bool setMaxThreads(int32_t maxThreads) noexcept
2012 {
2013 return mImpl->setMaxThreads(maxThreads);
2014 }
2015
2025 int32_t getMaxThreads() const noexcept
2026 {
2027 return mImpl->getMaxThreads();
2028 }
2029
2060 void setTemporaryDirectory(char const* path) noexcept
2061 {
2062 return mImpl->setTemporaryDirectory(path);
2063 }
2064
2071 char const* getTemporaryDirectory() const noexcept
2072 {
2073 return mImpl->getTemporaryDirectory();
2074 }
2075
2088 {
2089 return mImpl->setTempfileControlFlags(flags);
2090 }
2091
2100 {
2101 return mImpl->getTempfileControlFlags();
2102 }
2103
2110 {
2111 return mImpl->getPluginRegistry();
2112 }
2113
2127 IRuntime* loadRuntime(char const* path) noexcept
2128 {
2129 return mImpl->loadRuntime(path);
2130 }
2131
2139 void setEngineHostCodeAllowed(bool allowed) noexcept
2140 {
2141 return mImpl->setEngineHostCodeAllowed(allowed);
2142 }
2143
2149 bool getEngineHostCodeAllowed() const noexcept
2150 {
2151 return mImpl->getEngineHostCodeAllowed();
2152 }
2153
2154
2155protected:
2156 apiv::VRuntime* mImpl{};
2157};
2158
2159inline IRuntime::~IRuntime() noexcept = default;
2160
2168class IRefitter : public INoCopy
2169{
2170public:
2171 virtual ~IRefitter() noexcept = 0;
2172
2188 bool setWeights(char const* layerName, WeightsRole role, Weights weights) noexcept
2189 {
2190 return mImpl->setWeights(layerName, role, weights);
2191 }
2192
2205 bool refitCudaEngine() noexcept
2206 {
2207 return mImpl->refitCudaEngine();
2208 }
2209
2226 int32_t getMissing(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
2227 {
2228 return mImpl->getMissing(size, layerNames, roles);
2229 }
2230
2243 int32_t getAll(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
2244 {
2245 return mImpl->getAll(size, layerNames, roles);
2246 }
2247
2259 //
2262 void setErrorRecorder(IErrorRecorder* recorder) noexcept
2263 {
2264 mImpl->setErrorRecorder(recorder);
2265 }
2266
2278 {
2279 return mImpl->getErrorRecorder();
2280 }
2281
2302 bool setNamedWeights(char const* name, Weights weights) noexcept
2303 {
2304 return mImpl->setNamedWeights(name, weights);
2305 }
2306
2322 int32_t getMissingWeights(int32_t size, char const** weightsNames) noexcept
2323 {
2324 return mImpl->getMissingWeights(size, weightsNames);
2325 }
2326
2338 int32_t getAllWeights(int32_t size, char const** weightsNames) noexcept
2339 {
2340 return mImpl->getAllWeights(size, weightsNames);
2341 }
2342
2348 ILogger* getLogger() const noexcept
2349 {
2350 return mImpl->getLogger();
2351 }
2352
2364 bool setMaxThreads(int32_t maxThreads) noexcept
2365 {
2366 return mImpl->setMaxThreads(maxThreads);
2367 }
2368
2378 int32_t getMaxThreads() const noexcept
2379 {
2380 return mImpl->getMaxThreads();
2381 }
2382
2405 bool setNamedWeights(char const* name, Weights weights, TensorLocation location) noexcept
2406 {
2407 return mImpl->setNamedWeightsWithLocation(name, weights, location);
2408 }
2409
2421 Weights getNamedWeights(char const* weightsName) const noexcept
2422 {
2423 return mImpl->getNamedWeights(weightsName);
2424 }
2425
2437 TensorLocation getWeightsLocation(char const* weightsName) const noexcept
2438 {
2439 return mImpl->getWeightsLocation(weightsName);
2440 }
2441
2453 bool unsetNamedWeights(char const* weightsName) noexcept
2454 {
2455 return mImpl->unsetNamedWeights(weightsName);
2456 }
2457
2469 void setWeightsValidation(bool weightsValidation) noexcept
2470 {
2471 return mImpl->setWeightsValidation(weightsValidation);
2472 }
2473
2477 bool getWeightsValidation() const noexcept
2478 {
2479 return mImpl->getWeightsValidation();
2480 }
2481
2499 bool refitCudaEngineAsync(cudaStream_t stream) noexcept
2500 {
2501 return mImpl->refitCudaEngineAsync(stream);
2502 }
2503
2517 Weights getWeightsPrototype(char const* weightsName) const noexcept
2518 {
2519 return mImpl->getWeightsPrototype(weightsName);
2520 }
2521
2522protected:
2523 apiv::VRefitter* mImpl;
2524};
2525
2526inline IRefitter::~IRefitter() noexcept = default;
2527
2538enum class OptProfileSelector : int32_t
2539{
2540 kMIN = 0,
2541 kOPT = 1,
2542 kMAX = 2
2543};
2544
2546template <>
2548{
2549 static constexpr int32_t kVALUE = 3;
2550};
2551
2575{
2576public:
2604 bool setDimensions(char const* inputName, OptProfileSelector select, Dims const& dims) noexcept
2605 {
2606 return mImpl->setDimensions(inputName, select, dims);
2607 }
2608
2616 Dims getDimensions(char const* inputName, OptProfileSelector select) const noexcept
2617 {
2618 return mImpl->getDimensions(inputName, select);
2619 }
2620
2629 int32_t getNbShapeValues(char const* inputName) const noexcept
2630 {
2631 return mImpl->getNbShapeValues(inputName);
2632 }
2633
2647 bool setExtraMemoryTarget(float target) noexcept
2648 {
2649 return mImpl->setExtraMemoryTarget(target);
2650 }
2651
2659 float getExtraMemoryTarget() const noexcept
2660 {
2661 return mImpl->getExtraMemoryTarget();
2662 }
2663
2676 bool isValid() const noexcept
2677 {
2678 return mImpl->isValid();
2679 }
2680
2724 char const* inputName, OptProfileSelector select, int64_t const* values, int32_t nbValues) noexcept
2725 {
2726 return mImpl->setShapeValuesV2(inputName, select, values, nbValues);
2727 }
2728
2736 int64_t const* getShapeValuesV2(char const* inputName, OptProfileSelector select) const noexcept
2737 {
2738 return mImpl->getShapeValuesV2(inputName, select);
2739 }
2740
2741protected:
2742 apiv::VOptimizationProfile* mImpl;
2743 virtual ~IOptimizationProfile() noexcept = 0;
2744};
2745
2746inline IOptimizationProfile::~IOptimizationProfile() noexcept = default;
2747
2755enum class TacticSource : int32_t
2756{
2761
2765};
2766
2768template <>
2770{
2771 static constexpr int32_t kVALUE = 2;
2772};
2773
2780using TacticSources = uint32_t;
2781
2791enum class ProfilingVerbosity : int32_t
2792{
2793 kLAYER_NAMES_ONLY = 0,
2794 kNONE = 1,
2795 kDETAILED = 2,
2796};
2797
2799template <>
2801{
2802 static constexpr int32_t kVALUE = 3;
2803};
2804
2811using SerializationFlags = uint32_t;
2812
2820enum class SerializationFlag : int32_t
2821{
2822 kEXCLUDE_WEIGHTS = 0,
2824 kINCLUDE_REFIT = 2,
2825};
2826
2828template <>
2830{
2831 static constexpr int32_t kVALUE = 3;
2832};
2833
2842{
2843public:
2844 virtual ~ISerializationConfig() noexcept = 0;
2845
2857 bool setFlags(SerializationFlags serializationFlags) noexcept
2858 {
2859 return mImpl->setFlags(serializationFlags);
2860 }
2861
2870 {
2871 return mImpl->getFlags();
2872 }
2873
2881 bool clearFlag(SerializationFlag serializationFlag) noexcept
2882 {
2883 return mImpl->clearFlag(serializationFlag);
2884 }
2885
2893 bool setFlag(SerializationFlag serializationFlag) noexcept
2894 {
2895 return mImpl->setFlag(serializationFlag);
2896 }
2897
2905 bool getFlag(SerializationFlag serializationFlag) const noexcept
2906 {
2907 return mImpl->getFlag(serializationFlag);
2908 }
2909
2910protected:
2911 apiv::VSerializationConfig* mImpl;
2912};
2913
2914inline ISerializationConfig::~ISerializationConfig() noexcept = default;
2915
2928{
2929 kSTATIC = 0,
2930 kON_PROFILE_CHANGE = 1,
2931 kUSER_MANAGED = 2,
2932};
2933
2936template <>
2938{
2939 static constexpr int32_t kVALUE = 3;
2940};
2941
2942
2950{
2951public:
2952 virtual ~IRuntimeConfig() noexcept = 0;
2953
2959 void setExecutionContextAllocationStrategy(ExecutionContextAllocationStrategy strategy) noexcept
2960 {
2961 return mImpl->setExecutionContextAllocationStrategy(strategy);
2962 }
2963
2970 {
2971 return mImpl->getExecutionContextAllocationStrategy();
2972 }
2973
2974
2975protected:
2976 apiv::VRuntimeConfig* mImpl;
2977}; // class IRuntimeConfig
2978
2979inline IRuntimeConfig::~IRuntimeConfig() noexcept = default;
2980
2989enum class EngineStat : int32_t
2990{
2993
2996};
2997
2999template <>
3001{
3002 static constexpr int32_t kVALUE = 2;
3003};
3004
3012class ICudaEngine : public INoCopy
3013{
3014public:
3015 virtual ~ICudaEngine() noexcept = 0;
3016
3027 Dims getTensorShape(char const* tensorName) const noexcept
3028 {
3029 return mImpl->getTensorShape(tensorName);
3030 }
3031
3042 DataType getTensorDataType(char const* tensorName) const noexcept
3043 {
3044 return mImpl->getTensorDataType(tensorName);
3045 }
3046
3056 int32_t getNbLayers() const noexcept
3057 {
3058 return mImpl->getNbLayers();
3059 }
3060
3070 IHostMemory* serialize() const noexcept
3071 {
3072 return mImpl->serialize();
3073 }
3074
3089 {
3090 return mImpl->createExecutionContext(strategy);
3091 }
3092
3105 TensorLocation getTensorLocation(char const* tensorName) const noexcept
3106 {
3107 return mImpl->getTensorLocation(tensorName);
3108 }
3109
3125 bool isShapeInferenceIO(char const* tensorName) const noexcept
3126 {
3127 return mImpl->isShapeInferenceIO(tensorName);
3128 }
3129
3139 TensorIOMode getTensorIOMode(char const* tensorName) const noexcept
3140 {
3141 return mImpl->getTensorIOMode(tensorName);
3142 }
3143
3158 TRT_NODISCARD char const* getAliasedInputTensor(char const* tensorName) const noexcept
3159 {
3160 return mImpl->getAliasedInputTensor(tensorName);
3161 }
3162
3171 {
3172 return mImpl->createExecutionContextWithRuntimeConfig(runtimeConfig);
3173 }
3174
3184 {
3185 return mImpl->createRuntimeConfig();
3186 }
3187
3197 int64_t getDeviceMemorySizeV2() const noexcept
3198 {
3199 return mImpl->getDeviceMemorySizeV2();
3200 }
3201
3211 int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept
3212 {
3213 return mImpl->getDeviceMemorySizeForProfileV2(profileIndex);
3214 }
3215
3221 bool isRefittable() const noexcept
3222 {
3223 return mImpl->isRefittable();
3224 }
3225
3242 int32_t getTensorBytesPerComponent(char const* tensorName) const noexcept
3243 {
3244 return mImpl->getTensorBytesPerComponent(tensorName);
3245 }
3246
3260 int32_t getTensorBytesPerComponent(char const* tensorName, int32_t profileIndex) const noexcept
3261 {
3262 return mImpl->getTensorBytesPerComponentV2(tensorName, profileIndex);
3263 }
3264
3281 int32_t getTensorComponentsPerElement(char const* tensorName) const noexcept
3282 {
3283 return mImpl->getTensorComponentsPerElement(tensorName);
3284 }
3285
3299 int32_t getTensorComponentsPerElement(char const* tensorName, int32_t profileIndex) const noexcept
3300 {
3301 return mImpl->getTensorComponentsPerElementV2(tensorName, profileIndex);
3302 }
3303
3314 TensorFormat getTensorFormat(char const* tensorName) const noexcept
3315 {
3316 return mImpl->getTensorFormat(tensorName);
3317 }
3318
3328 TensorFormat getTensorFormat(char const* tensorName, int32_t profileIndex) const noexcept
3329 {
3330 return mImpl->getTensorFormatV2(tensorName, profileIndex);
3331 }
3332
3352 char const* getTensorFormatDesc(char const* tensorName) const noexcept
3353 {
3354 return mImpl->getTensorFormatDesc(tensorName);
3355 }
3356
3375 char const* getTensorFormatDesc(char const* tensorName, int32_t profileIndex) const noexcept
3376 {
3377 return mImpl->getTensorFormatDescV2(tensorName, profileIndex);
3378 }
3379
3392 int32_t getTensorVectorizedDim(char const* tensorName) const noexcept
3393 {
3394 return mImpl->getTensorVectorizedDim(tensorName);
3395 }
3396
3408 int32_t getTensorVectorizedDim(char const* tensorName, int32_t profileIndex) const noexcept
3409 {
3410 return mImpl->getTensorVectorizedDimV2(tensorName, profileIndex);
3411 }
3412
3423 char const* getName() const noexcept
3424 {
3425 return mImpl->getName();
3426 }
3427
3434 int32_t getNbOptimizationProfiles() const noexcept
3435 {
3436 return mImpl->getNbOptimizationProfiles();
3437 }
3438
3454 Dims getProfileShape(char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
3455 {
3456 return mImpl->getProfileShape(tensorName, profileIndex, select);
3457 }
3458
3470 {
3471 return mImpl->getEngineCapability();
3472 }
3473
3488 void setErrorRecorder(IErrorRecorder* recorder) noexcept
3489 {
3490 return mImpl->setErrorRecorder(recorder);
3491 }
3492
3504 {
3505 return mImpl->getErrorRecorder();
3506 }
3507
3520 {
3521 return mImpl->getTacticSources();
3522 }
3523
3532 {
3533 return mImpl->getProfilingVerbosity();
3534 }
3535
3542 {
3543 return mImpl->createEngineInspector();
3544 }
3545
3554 int32_t getNbIOTensors() const noexcept
3555 {
3556 return mImpl->getNbIOTensors();
3557 }
3558
3566 char const* getIOTensorName(int32_t index) const noexcept
3567 {
3568 return mImpl->getIOTensorName(index);
3569 }
3570
3578 {
3579 return mImpl->getHardwareCompatibilityLevel();
3580 }
3581
3592 int32_t getNbAuxStreams() const noexcept
3593 {
3594 return mImpl->getNbAuxStreams();
3595 }
3596
3603 {
3604 return mImpl->createSerializationConfig();
3605 }
3606
3623 {
3624 return mImpl->serializeWithConfig(config);
3625 }
3626
3638 int64_t getStreamableWeightsSize() const noexcept
3639 {
3640 return mImpl->getStreamableWeightsSize();
3641 }
3642
3677 bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept
3678 {
3679 return mImpl->setWeightStreamingBudgetV2(gpuMemoryBudget);
3680 }
3681
3693 int64_t getWeightStreamingBudgetV2() const noexcept
3694 {
3695 return mImpl->getWeightStreamingBudgetV2();
3696 }
3697
3716 int64_t getWeightStreamingAutomaticBudget() const noexcept
3717 {
3718 return mImpl->getWeightStreamingAutomaticBudget();
3719 }
3720
3743 {
3744 return mImpl->getWeightStreamingScratchMemorySize();
3745 }
3746
3756 bool isDebugTensor(char const* name) const noexcept
3757 {
3758 return mImpl->isDebugTensor(name);
3759 }
3760
3779 char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
3780 {
3781 return mImpl->getProfileTensorValuesV2(tensorName, profileIndex, select);
3782 }
3783
3806 int64_t getEngineStat(EngineStat stat) const noexcept
3807 {
3808 return mImpl->getEngineStat(stat);
3809 }
3810
3811protected:
3812 apiv::VCudaEngine* mImpl;
3813};
3814
3815inline ICudaEngine::~ICudaEngine() noexcept = default;
3816
3817namespace v_1_0
3818{
3820{
3821public:
3825 InterfaceInfo getInterfaceInfo() const noexcept override
3826 {
3827 return {"IOutputAllocator", 1, 0};
3828 }
3829
3850 char const* /* tensorName */, void* /* currentMemory */, uint64_t /* size */, uint64_t /* alignment */) noexcept
3851 {
3852 return nullptr;
3853 }
3854
3878 [[maybe_unused]] char const* tensorName, [[maybe_unused]] void* currentMemory, [[maybe_unused]] uint64_t size,
3879 [[maybe_unused]] uint64_t alignment, cudaStream_t /* stream */)
3880 {
3881 return reallocateOutput(tensorName, currentMemory, size, alignment);
3882 }
3883
3892 virtual void notifyShape(char const* tensorName, Dims const& dims) noexcept = 0;
3893};
3894} // namespace v_1_0
3895
3904
3905namespace v_1_0
3906{
3908{
3909public:
3913 InterfaceInfo getInterfaceInfo() const noexcept override
3914 {
3915 return {"IDebugListener", 1, 0};
3916 }
3917
3931 virtual bool processDebugTensor(void const* addr, TensorLocation location, DataType type, Dims const& shape,
3932 char const* name, cudaStream_t stream)
3933 = 0;
3934
3935 ~IDebugListener() override = default;
3936};
3937} // namespace v_1_0
3938
3945
3957{
3958public:
3959 virtual ~IExecutionContext() noexcept = 0;
3960
3969 void setDebugSync(bool sync) noexcept
3970 {
3971 mImpl->setDebugSync(sync);
3972 }
3973
3979 bool getDebugSync() const noexcept
3980 {
3981 return mImpl->getDebugSync();
3982 }
3983
3989 void setProfiler(IProfiler* profiler) noexcept
3990 {
3991 mImpl->setProfiler(profiler);
3992 }
3993
3999 IProfiler* getProfiler() const noexcept
4000 {
4001 return mImpl->getProfiler();
4002 }
4003
4009 ICudaEngine const& getEngine() const noexcept
4010 {
4011 return mImpl->getEngine();
4012 }
4013
4023 void setName(char const* name) noexcept
4024 {
4025 mImpl->setName(name);
4026 }
4027
4033 char const* getName() const noexcept
4034 {
4035 return mImpl->getName();
4036 }
4037
4059 void setDeviceMemory(void* memory) noexcept
4060 {
4061 mImpl->setDeviceMemory(memory);
4062 }
4063
4080 void setDeviceMemoryV2(void* memory, int64_t size) noexcept
4081 {
4082 return mImpl->setDeviceMemoryV2(memory, size);
4083 }
4084
4101 Dims getTensorStrides(char const* tensorName) const noexcept
4102 {
4103 return mImpl->getTensorStrides(tensorName);
4104 }
4105
4106public:
4116 int32_t getOptimizationProfile() const noexcept
4117 {
4118 return mImpl->getOptimizationProfile();
4119 }
4120
4134 bool setInputShape(char const* tensorName, Dims const& dims) noexcept
4135 {
4136 return mImpl->setInputShape(tensorName, dims);
4137 }
4138
4171 Dims getTensorShape(char const* tensorName) const noexcept
4172 {
4173 return mImpl->getTensorShape(tensorName);
4174 }
4175
4187 bool allInputDimensionsSpecified() const noexcept
4188 {
4189 return mImpl->allInputDimensionsSpecified();
4190 }
4191
4206 void setErrorRecorder(IErrorRecorder* recorder) noexcept
4207 {
4208 mImpl->setErrorRecorder(recorder);
4209 }
4210
4222 {
4223 return mImpl->getErrorRecorder();
4224 }
4225
4238 bool executeV2(void* const* bindings) noexcept
4239 {
4240 return mImpl->executeV2(bindings);
4241 }
4242
4282 bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
4283 {
4284 return mImpl->setOptimizationProfileAsync(profileIndex, stream);
4285 }
4286
4298 void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
4299 {
4300 mImpl->setEnqueueEmitsProfile(enqueueEmitsProfile);
4301 }
4302
4310 bool getEnqueueEmitsProfile() const noexcept
4311 {
4312 return mImpl->getEnqueueEmitsProfile();
4313 }
4314
4340 bool reportToProfiler() const noexcept
4341 {
4342 return mImpl->reportToProfiler();
4343 }
4344
4384 bool setTensorAddress(char const* tensorName, void* data) noexcept
4385 {
4386 return mImpl->setTensorAddress(tensorName, data);
4387 }
4388
4401 void const* getTensorAddress(char const* tensorName) const noexcept
4402 {
4403 return mImpl->getTensorAddress(tensorName);
4404 }
4405
4424 bool setOutputTensorAddress(char const* tensorName, void* data) noexcept
4425 {
4426 return mImpl->setOutputTensorAddress(tensorName, data);
4427 }
4428
4446 bool setInputTensorAddress(char const* tensorName, void const* data) noexcept
4447 {
4448 return mImpl->setInputTensorAddress(tensorName, data);
4449 }
4450
4465 void* getOutputTensorAddress(char const* tensorName) const noexcept
4466 {
4467 return mImpl->getOutputTensorAddress(tensorName);
4468 }
4469
4498 int32_t inferShapes(int32_t nbMaxNames, char const** tensorNames) noexcept
4499 {
4500 return mImpl->inferShapes(nbMaxNames, tensorNames);
4501 }
4502
4516 {
4517 return mImpl->updateDeviceMemorySizeForShapes();
4518 }
4519
4531 bool setInputConsumedEvent(cudaEvent_t event) noexcept
4532 {
4533 return mImpl->setInputConsumedEvent(event);
4534 }
4535
4541 cudaEvent_t getInputConsumedEvent() const noexcept
4542 {
4543 return mImpl->getInputConsumedEvent();
4544 }
4545
4560 bool setOutputAllocator(char const* tensorName, IOutputAllocator* outputAllocator) noexcept
4561 {
4562 return mImpl->setOutputAllocator(tensorName, outputAllocator);
4563 }
4564
4573 IOutputAllocator* getOutputAllocator(char const* tensorName) const noexcept
4574 {
4575 return mImpl->getOutputAllocator(tensorName);
4576 }
4577
4591 int64_t getMaxOutputSize(char const* tensorName) const noexcept
4592 {
4593 return mImpl->getMaxOutputSize(tensorName);
4594 }
4595
4612 {
4613 return mImpl->setTemporaryStorageAllocator(allocator);
4614 }
4615
4622 {
4623 return mImpl->getTemporaryStorageAllocator();
4624 }
4625
4645 bool enqueueV3(cudaStream_t stream) noexcept
4646 {
4647 return mImpl->enqueueV3(stream);
4648 }
4649
4661 void setPersistentCacheLimit(size_t size) noexcept
4662 {
4663 mImpl->setPersistentCacheLimit(size);
4664 }
4665
4672 size_t getPersistentCacheLimit() const noexcept
4673 {
4674 return mImpl->getPersistentCacheLimit();
4675 }
4676
4696 bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
4697 {
4698 return mImpl->setNvtxVerbosity(verbosity);
4699 }
4700
4709 {
4710 return mImpl->getNvtxVerbosity();
4711 }
4712
4739 void setAuxStreams(cudaStream_t* auxStreams, int32_t nbStreams) noexcept
4740 {
4741 mImpl->setAuxStreams(auxStreams, nbStreams);
4742 }
4743
4751 bool setDebugListener(IDebugListener* listener) noexcept
4752 {
4753 return mImpl->setDebugListener(listener);
4754 }
4755
4762 {
4763 return mImpl->getDebugListener();
4764 }
4765
4780 bool setTensorDebugState(char const* name, bool flag) noexcept
4781 {
4782 return mImpl->setTensorDebugState(name, flag);
4783 }
4784
4792 bool getDebugState(char const* name) const noexcept
4793 {
4794 return mImpl->getDebugState(name);
4795 }
4796
4803 {
4804 return mImpl->getRuntimeConfig();
4805 }
4806
4815 bool setAllTensorsDebugState(bool flag) noexcept
4816 {
4817 return mImpl->setAllTensorsDebugState(flag);
4818 }
4819
4831 bool setUnfusedTensorsDebugState(bool flag) noexcept
4832 {
4833 return mImpl->setUnfusedTensorsDebugState(flag);
4834 }
4835
4841 bool getUnfusedTensorsDebugState() const noexcept
4842 {
4843 return mImpl->getUnfusedTensorsDebugState();
4844 }
4845
4859 bool setCommunicator(void* communicator) noexcept
4860 {
4861 return mImpl->setCommunicator(communicator);
4862 }
4863
4864protected:
4865 apiv::VExecutionContext* mImpl;
4866}; // class IExecutionContext
4867
4868inline IExecutionContext::~IExecutionContext() noexcept = default;
4869
4877enum class LayerInformationFormat : int32_t
4878{
4879 kONELINE = 0,
4880 kJSON = 1,
4881};
4882
4884template <>
4886{
4887 static constexpr int32_t kVALUE = 2;
4888};
4889
4906{
4907public:
4908 virtual ~IEngineInspector() noexcept = 0;
4909
4922 bool setExecutionContext(IExecutionContext const* context) noexcept
4923 {
4924 return mImpl->setExecutionContext(context);
4925 }
4926
4935 {
4936 return mImpl->getExecutionContext();
4937 }
4938
4959 char const* getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
4960 {
4961 return mImpl->getLayerInformation(layerIndex, format);
4962 }
4963
4982 char const* getEngineInformation(LayerInformationFormat format) const noexcept
4983 {
4984 return mImpl->getEngineInformation(format);
4985 }
4986
5001 void setErrorRecorder(IErrorRecorder* recorder) noexcept
5002 {
5003 mImpl->setErrorRecorder(recorder);
5004 }
5005
5017 {
5018 return mImpl->getErrorRecorder();
5019 }
5020
5021protected:
5022 apiv::VEngineInspector* mImpl;
5023}; // class IEngineInspector
5024
5025inline IEngineInspector::~IEngineInspector() noexcept = default;
5026
5027} // namespace nvinfer1
5028
5033extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(void* logger, int32_t version) noexcept;
5034
5039extern "C" TENSORRTAPI void* createInferRefitter_INTERNAL(void* engine, void* logger, int32_t version) noexcept;
5040
5044extern "C" TENSORRTAPI nvinfer1::IPluginRegistry* getPluginRegistry() noexcept;
5045
5051extern "C" TENSORRTAPI nvinfer1::ILogger* getLogger() noexcept;
5052
5053namespace nvinfer1
5054{
5055namespace // unnamed namespace avoids linkage surprises when linking objects built with different versions of this
5056 // header.
5057{
5063inline IRuntime* createInferRuntime(ILogger& logger) noexcept
5064{
5065 return static_cast<IRuntime*>(createInferRuntime_INTERNAL(&logger, NV_TENSORRT_VERSION));
5066}
5067
5074inline IRefitter* createInferRefitter(ICudaEngine& engine, ILogger& logger) noexcept
5075{
5076 return static_cast<IRefitter*>(createInferRefitter_INTERNAL(&engine, &logger, NV_TENSORRT_VERSION));
5077}
5078
5079} // namespace
5080
5092template <typename T>
5094{
5095public:
5097 {
5098 getPluginRegistry()->registerCreator(instance, "");
5099 }
5100
5101private:
5103 T instance{};
5104};
5105
5106} // namespace nvinfer1
5107
5108#define REGISTER_TENSORRT_PLUGIN(name) \
5109 static nvinfer1::PluginRegistrar<name> pluginRegistrar##name {}
5110
5111namespace nvinfer1
5112{
5115namespace v_1_0
5116{
5126{
5127public:
5131 InterfaceInfo getInterfaceInfo() const noexcept override
5132 {
5133 return {"ILoggerFinder", 1, 0};
5134 }
5135
5143 virtual ILogger* findLogger() = 0;
5144
5145protected:
5147 ~ILoggerFinder() override = default;
5148};
5149
5150} // namespace v_1_0
5151
5153
5156namespace v_1_0
5157{
5158
5160{
5161public:
5163 ~IGpuAsyncAllocator() override = default;
5164
5194 void* allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags,
5195 cudaStream_t /*stream*/) noexcept override = 0;
5196
5222 bool deallocateAsync(void* const memory, cudaStream_t /*stream*/) noexcept override = 0;
5223
5248 uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept override
5249 {
5250 return allocateAsync(size, alignment, flags, nullptr);
5251 }
5252
5271 TRT_DEPRECATED bool deallocate(void* const memory) noexcept override
5272 {
5273 return deallocateAsync(memory, nullptr);
5274 }
5275
5279 InterfaceInfo getInterfaceInfo() const noexcept override
5280 {
5281 return {"IGpuAllocator", 1, 0};
5282 }
5283};
5284
5286{
5287public:
5291 InterfaceInfo getInterfaceInfo() const noexcept override
5292 {
5293 return InterfaceInfo{"PLUGIN CREATOR_V3ONE", 1, 0};
5294 }
5295
5313 AsciiChar const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept = 0;
5314
5321 virtual PluginFieldCollection const* getFieldNames() noexcept = 0;
5322
5329 virtual AsciiChar const* getPluginName() const noexcept = 0;
5330
5337 virtual AsciiChar const* getPluginVersion() const noexcept = 0;
5338
5345 virtual AsciiChar const* getPluginNamespace() const noexcept = 0;
5346
5348 virtual ~IPluginCreatorV3One() = default;
5349
5350protected:
5353 IPluginCreatorV3One& operator=(IPluginCreatorV3One const&) & = default;
5354 IPluginCreatorV3One& operator=(IPluginCreatorV3One&&) & = default;
5355};
5356
5357} // namespace v_1_0
5358
5373
5383
5384} // namespace nvinfer1
5385
5389extern "C" TENSORRTAPI int32_t getInferLibMajorVersion() noexcept;
5393extern "C" TENSORRTAPI int32_t getInferLibMinorVersion() noexcept;
5397extern "C" TENSORRTAPI int32_t getInferLibPatchVersion() noexcept;
5401extern "C" TENSORRTAPI int32_t getInferLibBuildVersion() noexcept;
5402
5403#endif // NV_INFER_RUNTIME_H
TENSORRTAPI nvinfer1::IPluginRegistry * getPluginRegistry() noexcept
Return the plugin registry.
TENSORRTAPI nvinfer1::ILogger * getLogger() noexcept
Return the logger object.
TENSORRTAPI int32_t getInferLibMinorVersion() noexcept
Return the library minor version number.
TENSORRTAPI int32_t getInferLibMajorVersion() noexcept
Return the library major version number.
TENSORRTAPI int32_t getInferLibPatchVersion() noexcept
Return the library patch version number.
TENSORRTAPI int32_t getInferLibBuildVersion() noexcept
Return the library build version number.
#define TENSORRTAPI
Definition: NvInferRuntimeBase.h:70
#define NV_TENSORRT_VERSION
Definition: NvInferRuntimeBase.h:102
#define TRT_NODISCARD
A stand-in for [[nodiscard]] and [[nodiscard(REASON)]] that works with older compilers.
Definition: NvInferRuntimeBase.h:57
#define TRT_DEPRECATED
Definition: NvInferRuntimeBase.h:42
Structure to define the dimensions of a tensor.
Definition: NvInferRuntimeBase.h:222
static constexpr int32_t MAX_DIMS
The maximum rank (number of dimensions) supported for a tensor.
Definition: NvInferRuntimeBase.h:225
Analog of class Dims with expressions instead of constants for the dimensions.
Definition: NvInferRuntime.h:350
int32_t nbDims
The number of dimensions.
Definition: NvInferRuntime.h:352
An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:3013
int32_t getTensorBytesPerComponent(char const *tensorName) const noexcept
Return the number of bytes per component of an element, or -1 if the tensor is not vectorized or prov...
Definition: NvInferRuntime.h:3242
ISerializationConfig * createSerializationConfig() noexcept
Create a serialization configuration object.
Definition: NvInferRuntime.h:3602
char const * getIOTensorName(int32_t index) const noexcept
Return name of an IO tensor.
Definition: NvInferRuntime.h:3566
int64_t getWeightStreamingBudgetV2() const noexcept
Returns the current weight streaming device memory budget in bytes.
Definition: NvInferRuntime.h:3693
EngineCapability getEngineCapability() const noexcept
Determine what execution capability this engine has.
Definition: NvInferRuntime.h:3469
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:3503
TensorFormat getTensorFormat(char const *tensorName, int32_t profileIndex) const noexcept
Return the tensor format of given profile, or TensorFormat::kLINEAR if the provided name does not map...
Definition: NvInferRuntime.h:3328
int64_t const * getProfileTensorValuesV2(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values (not dimensions) for an input tensor given its name under ...
Definition: NvInferRuntime.h:3778
apiv::VCudaEngine * mImpl
Definition: NvInferRuntime.h:3812
IExecutionContext * createExecutionContext(ExecutionContextAllocationStrategy strategy=ExecutionContextAllocationStrategy::kSTATIC) noexcept
Create an execution context and specify the strategy for allocating internal activation memory.
Definition: NvInferRuntime.h:3087
char const * getTensorFormatDesc(char const *tensorName) const noexcept
Return the human readable description of the tensor format, or empty string if the provided name does...
Definition: NvInferRuntime.h:3352
Dims getProfileShape(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for an input tensor given its name under an optimizati...
Definition: NvInferRuntime.h:3454
bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept
Limit the maximum amount of GPU memory usable for network weights in bytes.
Definition: NvInferRuntime.h:3677
IExecutionContext * createExecutionContext(IRuntimeConfig *runtimeConfig) noexcept
Create an execution context with TensorRT JIT runtime config.
Definition: NvInferRuntime.h:3170
int32_t getNbAuxStreams() const noexcept
Return the number of auxiliary streams used by this engine.
Definition: NvInferRuntime.h:3592
int64_t getStreamableWeightsSize() const noexcept
Get the total size in bytes of all streamable weights.
Definition: NvInferRuntime.h:3638
DataType getTensorDataType(char const *tensorName) const noexcept
Determine the required data type for a buffer from its tensor name.
Definition: NvInferRuntime.h:3042
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:3488
TacticSources getTacticSources() const noexcept
return the tactic sources required by this engine.
Definition: NvInferRuntime.h:3519
TRT_NODISCARD char const * getAliasedInputTensor(char const *tensorName) const noexcept
Get the input tensor name that an output tensor should alias with.
Definition: NvInferRuntime.h:3158
IHostMemory * serializeWithConfig(ISerializationConfig &config) const noexcept
Serialize the network to a stream with the provided SerializationConfig.
Definition: NvInferRuntime.h:3622
int64_t getWeightStreamingAutomaticBudget() const noexcept
TensorRT automatically determines a device memory budget for the model to run. The budget is close to...
Definition: NvInferRuntime.h:3716
bool isDebugTensor(char const *name) const noexcept
Check if a tensor is marked as a debug tensor.
Definition: NvInferRuntime.h:3756
int32_t getTensorVectorizedDim(char const *tensorName, int32_t profileIndex) const noexcept
Return the dimension index that the buffer is vectorized of given profile, or -1 if the provided name...
Definition: NvInferRuntime.h:3408
char const * getName() const noexcept
Returns the name of the network associated with the engine.
Definition: NvInferRuntime.h:3423
ProfilingVerbosity getProfilingVerbosity() const noexcept
Return the ProfilingVerbosity the builder config was set to when the engine was built.
Definition: NvInferRuntime.h:3531
bool isShapeInferenceIO(char const *tensorName) const noexcept
True if tensor is required as input for shape calculations or is output from shape calculations.
Definition: NvInferRuntime.h:3125
int64_t getWeightStreamingScratchMemorySize() const noexcept
Returns the size of the scratch memory required by the current weight streaming budget.
Definition: NvInferRuntime.h:3742
int64_t getDeviceMemorySizeV2() const noexcept
Return the maximum device memory required by the context over all profiles.
Definition: NvInferRuntime.h:3197
int32_t getTensorVectorizedDim(char const *tensorName) const noexcept
Return the dimension index that the buffer is vectorized, or -1 if the provided name does not map to ...
Definition: NvInferRuntime.h:3392
int32_t getTensorComponentsPerElement(char const *tensorName, int32_t profileIndex) const noexcept
Return the number of components included in one element of given profile, or -1 if tensor is not vect...
Definition: NvInferRuntime.h:3299
int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept
Return the maximum device memory required by the context for a profile.
Definition: NvInferRuntime.h:3211
IRuntimeConfig * createRuntimeConfig() noexcept
Create a runtime config for TensorRT JIT. The caller is responsible for ownership of the returned IRu...
Definition: NvInferRuntime.h:3183
TensorFormat getTensorFormat(char const *tensorName) const noexcept
Return the tensor format, or TensorFormat::kLINEAR if the provided name does not map to an input or o...
Definition: NvInferRuntime.h:3314
IHostMemory * serialize() const noexcept
Serialize the network to a stream.
Definition: NvInferRuntime.h:3070
int64_t getEngineStat(EngineStat stat) const noexcept
Get engine statistics according to the given enum value.
Definition: NvInferRuntime.h:3806
TensorLocation getTensorLocation(char const *tensorName) const noexcept
Get whether an input or output tensor must be on GPU or CPU.
Definition: NvInferRuntime.h:3105
IEngineInspector * createEngineInspector() const noexcept
Create a new engine inspector which prints the layer information in an engine or an execution context...
Definition: NvInferRuntime.h:3541
int32_t getTensorBytesPerComponent(char const *tensorName, int32_t profileIndex) const noexcept
Return the number of bytes per component of an element given of given profile, or -1 if the tensor is...
Definition: NvInferRuntime.h:3260
HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept
Return the hardware compatibility level of this engine.
Definition: NvInferRuntime.h:3577
int32_t getNbOptimizationProfiles() const noexcept
Get the number of optimization profiles defined for this engine.
Definition: NvInferRuntime.h:3434
char const * getTensorFormatDesc(char const *tensorName, int32_t profileIndex) const noexcept
Return the human readable description of the tensor format of given profile, or empty string if the p...
Definition: NvInferRuntime.h:3375
TensorIOMode getTensorIOMode(char const *tensorName) const noexcept
Determine whether a tensor is an input or output tensor.
Definition: NvInferRuntime.h:3139
int32_t getNbLayers() const noexcept
Get the number of layers in the network.
Definition: NvInferRuntime.h:3056
int32_t getNbIOTensors() const noexcept
Return number of IO tensors.
Definition: NvInferRuntime.h:3554
virtual ~ICudaEngine() noexcept=0
int32_t getTensorComponentsPerElement(char const *tensorName) const noexcept
Return the number of components included in one element, or -1 if tensor is not vectorized or if the ...
Definition: NvInferRuntime.h:3281
bool isRefittable() const noexcept
Return true if an engine can be refit.
Definition: NvInferRuntime.h:3221
An IDimensionExpr represents an integer expression constructed from constants, input dimensions,...
Definition: NvInferRuntime.h:228
bool isConstant() const noexcept
Return true if expression is a build-time constant.
Definition: NvInferRuntime.h:233
bool isSizeTensor() const noexcept
Return true if this denotes the value of a size tensor.
Definition: NvInferRuntime.h:259
virtual ~IDimensionExpr() noexcept=0
apiv::VDimensionExpr * mImpl
Definition: NvInferRuntime.h:250
int64_t getConstantValue() const noexcept
Get the value of the constant.
Definition: NvInferRuntime.h:244
An engine inspector which prints out the layer information of an engine or an execution context.
Definition: NvInferRuntime.h:4906
char const * getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
Get a string describing the information about a specific layer in the current engine or the execution...
Definition: NvInferRuntime.h:4959
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:5016
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:5001
virtual ~IEngineInspector() noexcept=0
IExecutionContext const * getExecutionContext() const noexcept
Get the context currently being inspected.
Definition: NvInferRuntime.h:4934
apiv::VEngineInspector * mImpl
Definition: NvInferRuntime.h:5022
char const * getEngineInformation(LayerInformationFormat format) const noexcept
Get a string describing the information about all the layers in the current engine or the execution c...
Definition: NvInferRuntime.h:4982
Context for executing inference using an engine, with functionally unsafe features.
Definition: NvInferRuntime.h:3957
IOutputAllocator * getOutputAllocator(char const *tensorName) const noexcept
Get output allocator associated with output tensor of given name, or nullptr if the provided name doe...
Definition: NvInferRuntime.h:4573
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:4221
bool reportToProfiler() const noexcept
Calculate layer timing info for the current optimization profile in IExecutionContext and update the ...
Definition: NvInferRuntime.h:4340
void setDeviceMemory(void *memory) noexcept
Set the device memory for use by this execution context.
Definition: NvInferRuntime.h:4059
bool setTensorDebugState(char const *name, bool flag) noexcept
Set debug state of tensor given the tensor name.
Definition: NvInferRuntime.h:4780
char const * getName() const noexcept
Return the name of the execution context.
Definition: NvInferRuntime.h:4033
IGpuAllocator * getTemporaryStorageAllocator() const noexcept
Get allocator set by setTemporaryStorageAllocator.
Definition: NvInferRuntime.h:4621
void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
Set whether enqueue emits layer timing to the profiler.
Definition: NvInferRuntime.h:4298
bool setUnfusedTensorsDebugState(bool flag) noexcept
Turn the debug state of unfused tensors on or off.
Definition: NvInferRuntime.h:4831
Dims getTensorShape(char const *tensorName) const noexcept
Return the shape of the given input or output.
Definition: NvInferRuntime.h:4171
bool getDebugState(char const *name) const noexcept
Get the debug state.
Definition: NvInferRuntime.h:4792
bool setInputShape(char const *tensorName, Dims const &dims) noexcept
Set shape of given input.
Definition: NvInferRuntime.h:4134
bool executeV2(void *const *bindings) noexcept
Synchronously execute a network.
Definition: NvInferRuntime.h:4238
bool getEnqueueEmitsProfile() const noexcept
Get the enqueueEmitsProfile state.
Definition: NvInferRuntime.h:4310
void const * getTensorAddress(char const *tensorName) const noexcept
Get memory address bound to given input or output tensor, or nullptr if the provided name does not ma...
Definition: NvInferRuntime.h:4401
bool setOutputAllocator(char const *tensorName, IOutputAllocator *outputAllocator) noexcept
Set output allocator to use for output tensor of given name. Pass nullptr to outputAllocator to unset...
Definition: NvInferRuntime.h:4560
bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
Select an optimization profile for the current context with async semantics.
Definition: NvInferRuntime.h:4282
apiv::VExecutionContext * mImpl
Definition: NvInferRuntime.h:4865
bool setOutputTensorAddress(char const *tensorName, void *data) noexcept
Set the memory address for a given output tensor.
Definition: NvInferRuntime.h:4424
void setPersistentCacheLimit(size_t size) noexcept
Set the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:4661
virtual ~IExecutionContext() noexcept=0
size_t getPersistentCacheLimit() const noexcept
Get the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:4672
bool setAllTensorsDebugState(bool flag) noexcept
Turn the debug state of all debug tensors on or off.
Definition: NvInferRuntime.h:4815
ICudaEngine const & getEngine() const noexcept
Get the associated engine.
Definition: NvInferRuntime.h:4009
ProfilingVerbosity getNvtxVerbosity() const noexcept
Get the NVTX verbosity of the execution context.
Definition: NvInferRuntime.h:4708
size_t updateDeviceMemorySizeForShapes() noexcept
Recompute the internal activation buffer sizes based on the current input shapes, and return the tota...
Definition: NvInferRuntime.h:4515
void setAuxStreams(cudaStream_t *auxStreams, int32_t nbStreams) noexcept
Set the auxiliary streams that TensorRT should launch kernels on in the next enqueueV3() call.
Definition: NvInferRuntime.h:4739
int64_t getMaxOutputSize(char const *tensorName) const noexcept
Get upper bound on an output tensor's size, in bytes, based on the current optimization profile and i...
Definition: NvInferRuntime.h:4591
int32_t inferShapes(int32_t nbMaxNames, char const **tensorNames) noexcept
Run shape calculations.
Definition: NvInferRuntime.h:4498
bool setDebugListener(IDebugListener *listener) noexcept
Set DebugListener for this execution context.
Definition: NvInferRuntime.h:4751
bool setTensorAddress(char const *tensorName, void *data) noexcept
Set memory address for given input or output tensor.
Definition: NvInferRuntime.h:4384
bool setTemporaryStorageAllocator(IGpuAllocator *allocator) noexcept
Specify allocator to use for internal temporary storage.
Definition: NvInferRuntime.h:4611
void * getOutputTensorAddress(char const *tensorName) const noexcept
Get memory address for given output.
Definition: NvInferRuntime.h:4465
bool enqueueV3(cudaStream_t stream) noexcept
Enqueue inference on a stream.
Definition: NvInferRuntime.h:4645
IDebugListener * getDebugListener() noexcept
Get the DebugListener of this execution context.
Definition: NvInferRuntime.h:4761
int32_t getOptimizationProfile() const noexcept
Get the index of the currently selected optimization profile.
Definition: NvInferRuntime.h:4116
bool setInputTensorAddress(char const *tensorName, void const *data) noexcept
Set memory address for given input.
Definition: NvInferRuntime.h:4446
bool getDebugSync() const noexcept
Get the debug sync flag.
Definition: NvInferRuntime.h:3979
bool setInputConsumedEvent(cudaEvent_t event) noexcept
Mark input as consumed.
Definition: NvInferRuntime.h:4531
Dims getTensorStrides(char const *tensorName) const noexcept
Return the strides of the buffer for the given tensor name.
Definition: NvInferRuntime.h:4101
bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
Set the verbosity of the NVTX markers in the execution context.
Definition: NvInferRuntime.h:4696
IProfiler * getProfiler() const noexcept
Get the profiler.
Definition: NvInferRuntime.h:3999
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:4206
bool setCommunicator(void *communicator) noexcept
Set the NCCL communicator for the execution context.
Definition: NvInferRuntime.h:4859
void setDeviceMemoryV2(void *memory, int64_t size) noexcept
Set the device memory and its corresponding size for use by this execution context.
Definition: NvInferRuntime.h:4080
bool allInputDimensionsSpecified() const noexcept
Whether all dynamic dimensions of input tensors have been specified.
Definition: NvInferRuntime.h:4187
bool getUnfusedTensorsDebugState() const noexcept
Get the debug state of unfused tensors.
Definition: NvInferRuntime.h:4841
void setProfiler(IProfiler *profiler) noexcept
Set the profiler.
Definition: NvInferRuntime.h:3989
void setName(char const *name) noexcept
Set the name of the execution context.
Definition: NvInferRuntime.h:4023
cudaEvent_t getInputConsumedEvent() const noexcept
The event associated with consuming the input.
Definition: NvInferRuntime.h:4541
IRuntimeConfig * getRuntimeConfig() const noexcept
Get the runtime config object used during execution context creation.
Definition: NvInferRuntime.h:4802
Object for constructing IDimensionExpr.
Definition: NvInferRuntime.h:285
IDimensionExpr const * operation(DimensionOperation op, IDimensionExpr const &first, IDimensionExpr const &second) noexcept
Get the operation.
Definition: NvInferRuntime.h:301
IDimensionExpr const * constant(int64_t value) noexcept
Return pointer to IDimensionExpr for given value.
Definition: NvInferRuntime.h:290
apiv::VExprBuilder * mImpl
Definition: NvInferRuntime.h:308
virtual ~IExprBuilder() noexcept=0
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:139
void * data() const noexcept
A pointer to the raw data that is owned by the library.
Definition: NvInferRuntime.h:144
virtual ~IHostMemory() noexcept=0
DataType type() const noexcept
The type of the memory that was allocated.
Definition: NvInferRuntime.h:156
std::size_t size() const noexcept
The size in bytes of the data that was allocated.
Definition: NvInferRuntime.h:150
apiv::VHostMemory * mImpl
Definition: NvInferRuntime.h:162
Forward declaration of IEngineInspector for use by other interfaces.
Definition: NvInferRuntime.h:51
INoCopy & operator=(INoCopy &&other)=delete
INoCopy(INoCopy const &other)=delete
INoCopy(INoCopy &&other)=delete
virtual ~INoCopy()=default
INoCopy & operator=(INoCopy const &other)=delete
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:2575
apiv::VOptimizationProfile * mImpl
Definition: NvInferRuntime.h:2742
Dims getDimensions(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:2616
float getExtraMemoryTarget() const noexcept
Get the extra memory target that has been defined for this profile.
Definition: NvInferRuntime.h:2659
bool setExtraMemoryTarget(float target) noexcept
Set a target for extra GPU memory that may be used by this profile.
Definition: NvInferRuntime.h:2647
bool setDimensions(char const *inputName, OptProfileSelector select, Dims const &dims) noexcept
Set the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:2604
virtual ~IOptimizationProfile() noexcept=0
bool isValid() const noexcept
Check whether the optimization profile can be passed to an IBuilderConfig object.
Definition: NvInferRuntime.h:2676
int64_t const * getShapeValuesV2(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2736
bool setShapeValuesV2(char const *inputName, OptProfileSelector select, int64_t const *values, int32_t nbValues) noexcept
Set the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2723
int32_t getNbShapeValues(char const *inputName) const noexcept
Get the number of values for an input shape tensor.
Definition: NvInferRuntime.h:2629
Single registration point for all plugins in an application. It is used to find plugin implementation...
Definition: NvInferRuntimeCommon.h:56
virtual bool registerCreator(IPluginCreatorInterface &creator, AsciiChar const *const pluginNamespace) noexcept=0
Register a plugin creator. Returns false if a plugin creator with the same type is already registered...
Interface for plugins to access per context resources provided by TensorRT.
Definition: NvInferRuntime.h:792
virtual IErrorRecorder * getErrorRecorder() const noexcept=0
Get the error recorder associated with the resource context.
IPluginResourceContext & operator=(IPluginResourceContext const &) &=default
virtual IGpuAllocator * getGpuAllocator() const noexcept=0
Get the GPU allocator associated with the resource context.
Similar to IPluginV2Ext, but with support for dynamic shapes.
Definition: NvInferRuntime.h:407
IPluginV2DynamicExt * clone() const noexcept override=0
Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin...
virtual ~IPluginV2DynamicExt() noexcept
Definition: NvInferRuntime.h:556
Plugin class for user-implemented layers.
Definition: NvInferRuntimePlugin.h:474
Updates weights in an engine.
Definition: NvInferRuntime.h:2169
bool refitCudaEngineAsync(cudaStream_t stream) noexcept
Enqueue weights refitting of the associated engine on the given stream.
Definition: NvInferRuntime.h:2499
int32_t getMaxThreads() const noexcept
get the maximum number of threads that can be used by the refitter.
Definition: NvInferRuntime.h:2378
TensorLocation getWeightsLocation(char const *weightsName) const noexcept
Get location for the weights associated with the given name.
Definition: NvInferRuntime.h:2437
bool setNamedWeights(char const *name, Weights weights) noexcept
Specify new weights of given name.
Definition: NvInferRuntime.h:2302
int32_t getAllWeights(int32_t size, char const **weightsNames) noexcept
Get names of all weights that could be refit.
Definition: NvInferRuntime.h:2338
virtual ~IRefitter() noexcept=0
ILogger * getLogger() const noexcept
get the logger with which the refitter was created
Definition: NvInferRuntime.h:2348
bool refitCudaEngine() noexcept
Refits associated engine.
Definition: NvInferRuntime.h:2205
int32_t getMissingWeights(int32_t size, char const **weightsNames) noexcept
Get names of missing weights.
Definition: NvInferRuntime.h:2322
int32_t getMissing(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of missing weights.
Definition: NvInferRuntime.h:2226
Weights getNamedWeights(char const *weightsName) const noexcept
Get weights associated with the given name.
Definition: NvInferRuntime.h:2421
bool unsetNamedWeights(char const *weightsName) noexcept
Unset weights associated with the given name.
Definition: NvInferRuntime.h:2453
Weights getWeightsPrototype(char const *weightsName) const noexcept
Get the Weights prototype associated with the given name.
Definition: NvInferRuntime.h:2517
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:2364
bool setNamedWeights(char const *name, Weights weights, TensorLocation location) noexcept
Specify new weights on a specified device of given name.
Definition: NvInferRuntime.h:2405
void setWeightsValidation(bool weightsValidation) noexcept
Set whether to validate weights during refitting.
Definition: NvInferRuntime.h:2469
apiv::VRefitter * mImpl
Definition: NvInferRuntime.h:2523
int32_t getAll(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of all weights that could be refit.
Definition: NvInferRuntime.h:2243
bool getWeightsValidation() const noexcept
Get whether to validate weights values during refitting.
Definition: NvInferRuntime.h:2477
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:2262
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:2277
A class for runtime configuration. This class is used during execution context creation.
Definition: NvInferRuntime.h:2950
apiv::VRuntimeConfig * mImpl
Definition: NvInferRuntime.h:2976
virtual ~IRuntimeConfig() noexcept=0
ExecutionContextAllocationStrategy getExecutionContextAllocationStrategy() const noexcept
Get the execution context allocation strategy.
Definition: NvInferRuntime.h:2969
Allows a serialized functionally unsafe engine to be deserialized.
Definition: NvInferRuntime.h:1862
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:2011
IRuntime * loadRuntime(char const *path) noexcept
Load IRuntime from the file.
Definition: NvInferRuntime.h:2127
bool getEngineHostCodeAllowed() const noexcept
Get whether the runtime is allowed to deserialize engines with host executable code.
Definition: NvInferRuntime.h:2149
TempfileControlFlags getTempfileControlFlags() const noexcept
Get the tempfile control flags for this runtime.
Definition: NvInferRuntime.h:2099
void setEngineHostCodeAllowed(bool allowed) noexcept
Set whether the runtime is allowed to deserialize engines with host executable code.
Definition: NvInferRuntime.h:2139
void setTemporaryDirectory(char const *path) noexcept
Set the directory that will be used by this runtime for temporary files.
Definition: NvInferRuntime.h:2060
IPluginRegistry & getPluginRegistry() noexcept
Get the local plugin registry that can be used by the runtime.
Definition: NvInferRuntime.h:2109
int32_t getNbDLACores() const noexcept
Returns number of DLA hardware cores accessible or 0 if DLA is unavailable.
Definition: NvInferRuntime.h:1895
ICudaEngine * deserializeCudaEngine(void const *blob, std::size_t size) noexcept
Deserialize an engine from host memory.
Definition: NvInferRuntime.h:1963
virtual ~IRuntime() noexcept=0
void setTempfileControlFlags(TempfileControlFlags flags) noexcept
Set the tempfile control flags for this runtime.
Definition: NvInferRuntime.h:2087
int32_t getDLACore() const noexcept
Get the DLA core that the engine executes on.
Definition: NvInferRuntime.h:1887
void setGpuAllocator(IGpuAllocator *allocator) noexcept
Set the GPU allocator.
Definition: NvInferRuntime.h:1911
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:1945
ICudaEngine * deserializeCudaEngine(IStreamReaderV2 &streamReader)
Deserialize an engine from a stream. IStreamReaderV2 is expected to support reading to both host and ...
Definition: NvInferRuntime.h:1986
ILogger * getLogger() const noexcept
get the logger with which the runtime was created
Definition: NvInferRuntime.h:1996
int32_t getMaxThreads() const noexcept
Get the maximum number of threads that can be used by the runtime.
Definition: NvInferRuntime.h:2025
char const * getTemporaryDirectory() const noexcept
Get the directory that will be used by this runtime for temporary files.
Definition: NvInferRuntime.h:2071
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:1930
Holds properties for configuring an engine to serialize the binary.
Definition: NvInferRuntime.h:2842
bool clearFlag(SerializationFlag serializationFlag) noexcept
clear a serialization flag.
Definition: NvInferRuntime.h:2881
virtual ~ISerializationConfig() noexcept=0
bool setFlag(SerializationFlag serializationFlag) noexcept
Set a serialization flag.
Definition: NvInferRuntime.h:2893
SerializationFlags getFlags() const noexcept
Get the serialization flags for this config.
Definition: NvInferRuntime.h:2869
bool getFlag(SerializationFlag serializationFlag) const noexcept
Returns true if the serialization flag is set.
Definition: NvInferRuntime.h:2905
apiv::VSerializationConfig * mImpl
Definition: NvInferRuntime.h:2911
An Interface class for version control.
Definition: NvInferRuntimeBase.h:282
Version information associated with a TRT interface.
Definition: NvInferRuntimeBase.h:247
Register the plugin creator to the registry The static registry object will be instantiated when the ...
Definition: NvInferRuntime.h:5094
PluginRegistrar()
Definition: NvInferRuntime.h:5096
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:121
DataType type
The type of the weights.
Definition: NvInferRuntime.h:123
int64_t count
The number of weights in the array.
Definition: NvInferRuntime.h:125
void const * values
The weight values, in a contiguous array.
Definition: NvInferRuntime.h:124
Definition: NvInferRuntime.h:3908
virtual bool processDebugTensor(void const *addr, TensorLocation location, DataType type, Dims const &shape, char const *name, cudaStream_t stream)=0
Callback function that is called when a debug tensor’s value is updated and the debug state of the te...
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:3913
~IDebugListener() override=default
Definition: NvInferRuntimeBase.h:419
Definition: NvInferRuntime.h:1652
virtual void * allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t) noexcept
A thread-safe callback implemented by the application to handle stream-ordered acquisition of GPU mem...
Definition: NvInferRuntime.h:1774
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1815
virtual TRT_DEPRECATED bool deallocate(void *const memory) noexcept=0
A thread-safe callback implemented by the application to handle release of GPU memory.
~IGpuAllocator() override=default
virtual void * reallocate(void *const, uint64_t, uint64_t) noexcept
A thread-safe callback implemented by the application to resize an existing allocation.
Definition: NvInferRuntime.h:1721
virtual TRT_DEPRECATED void * allocate(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept=0
A thread-safe callback implemented by the application to handle acquisition of GPU memory.
virtual bool deallocateAsync(void *const memory, cudaStream_t) noexcept
A thread-safe callback implemented by the application to handle stream-ordered release of GPU memory.
Definition: NvInferRuntime.h:1807
Definition: NvInferRuntime.h:5160
bool deallocateAsync(void *const memory, cudaStream_t) noexcept override=0
A thread-safe callback implemented by the application to handle stream-ordered asynchronous release o...
void * allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t) noexcept override=0
A thread-safe callback implemented by the application to handle stream-ordered asynchronous acquisiti...
TRT_DEPRECATED void * allocate(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept override
A thread-safe callback implemented by the application to handle acquisition of GPU memory.
Definition: NvInferRuntime.h:5247
TRT_DEPRECATED bool deallocate(void *const memory) noexcept override
A thread-safe callback implemented by the application to handle release of GPU memory.
Definition: NvInferRuntime.h:5271
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:5279
~IGpuAsyncAllocator() override=default
A virtual base class to find a logger. Allows a plugin to find an instance of a logger if it needs to...
Definition: NvInferRuntime.h:5126
virtual ILogger * findLogger()=0
Get the logger used by the engine or execution context which called the plugin method.
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:5131
~ILoggerFinder() override=default
Protected: TRT owns ILoggerFinder instances and passes non-owning pointers to plugins.
Application-implemented logging interface for the builder, refitter and runtime.
Definition: NvInferRuntime.h:1575
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1580
~ILogger() override=default
Severity
The severity corresponding to a log message.
Definition: NvInferRuntime.h:1591
virtual void log(Severity severity, AsciiChar const *msg) noexcept=0
A callback implemented by the application to handle logging messages;.
Definition: NvInferRuntime.h:3820
virtual TRT_DEPRECATED void * reallocateOutput(char const *, void *, uint64_t, uint64_t) noexcept
Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated....
Definition: NvInferRuntime.h:3849
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:3825
virtual void * reallocateOutputAsync(char const *tensorName, void *currentMemory, uint64_t size, uint64_t alignment, cudaStream_t)
Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated....
Definition: NvInferRuntime.h:3877
virtual void notifyShape(char const *tensorName, Dims const &dims) noexcept=0
Called by TensorRT when the shape of the output tensor is known.
Definition: NvInferPluginBase.h:141
Definition: NvInferPluginBase.h:193
Definition: NvInferRuntime.h:5286
virtual PluginFieldCollection const * getFieldNames() noexcept=0
Return a list of fields that need to be passed to createPlugin() when creating a plugin for use in th...
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:5291
virtual IPluginV3 * createPlugin(AsciiChar const *name, PluginFieldCollection const *fc, TensorRTPhase phase) noexcept=0
Return a plugin object. Return nullptr in case of error.
Definition: NvInferPluginBase.h:206
Definition: NvInferRuntime.h:863
virtual int32_t getFormatCombinationLimit() noexcept
Return the maximum number of format combinations that will be timed by TensorRT during the build phas...
Definition: NvInferRuntime.h:1067
virtual int32_t getNbOutputs() const noexcept=0
Get the number of outputs from the plugin.
virtual int32_t configurePlugin(DynamicPluginTensorDesc const *in, int32_t nbInputs, DynamicPluginTensorDesc const *out, int32_t nbOutputs) noexcept=0
Configure the plugin.
virtual int32_t getNbTactics() noexcept
Query for the number of custom tactics the plugin intends to use.
Definition: NvInferRuntime.h:1043
virtual char const * getMetadataString() noexcept
Query for a string representing the configuration of the plugin. May be called anytime after plugin c...
Definition: NvInferRuntime.h:1078
virtual char const * getTimingCacheID() noexcept
Called to query the suffix to use for the timing cache ID. May be called anytime after plugin creatio...
Definition: NvInferRuntime.h:1059
virtual bool supportsFormatCombination(int32_t pos, DynamicPluginTensorDesc const *inOut, int32_t nbInputs, int32_t nbOutputs) noexcept=0
Return true if plugin supports the format and datatype for the input/output indexed by pos.
virtual int32_t getValidTactics(int32_t *, int32_t) noexcept
Query for any custom tactics that the plugin intends to use.
Definition: NvInferRuntime.h:1035
virtual int32_t getOutputDataTypes(DataType *outputTypes, int32_t nbOutputs, const DataType *inputTypes, int32_t nbInputs) const noexcept=0
Provide the data types of the plugin outputs if the input tensors have the data types provided.
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:875
virtual int32_t getOutputShapes(DimsExprs const *inputs, int32_t nbInputs, DimsExprs const *shapeInputs, int32_t nbShapeInputs, DimsExprs *outputs, int32_t nbOutputs, IExprBuilder &exprBuilder) noexcept=0
Provide expressions for computing dimensions of the output tensors from dimensions of the input tenso...
Definition: NvInferRuntime.h:820
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:825
virtual AsciiChar const * getPluginName() const noexcept=0
Return the plugin name. Should match the plugin name returned by the corresponding plugin creator.
Definition: NvInferRuntime.h:1085
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1090
virtual int32_t onShapeChange(PluginTensorDesc const *in, int32_t nbInputs, PluginTensorDesc const *out, int32_t nbOutputs) noexcept=0
Called when a plugin is being prepared for execution for specific dimensions. This could happen multi...
virtual PluginFieldCollection const * getFieldsToSerialize() noexcept=0
Get the plugin fields which should be serialized.
virtual int32_t setTactic(int32_t) noexcept
Set the tactic to be used in the subsequent call to enqueue(). If no custom tactics were advertised,...
Definition: NvInferRuntime.h:1102
virtual int32_t enqueue(PluginTensorDesc const *inputDesc, PluginTensorDesc const *outputDesc, void const *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept=0
Execute the layer.
virtual IPluginV3 * attachToContext(IPluginResourceContext *context) noexcept=0
Clone the plugin, attach the cloned plugin object to a execution context and grant the cloned plugin ...
Definition: NvInferRuntime.h:1275
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1280
~IProfiler() override=default
virtual void reportLayerTime(char const *layerName, float ms) noexcept=0
Layer time reporting callback.
Definition: NvInferRuntime.h:608
~IStreamReader() override=default
IStreamReader & operator=(IStreamReader const &) &=default
IStreamReader & operator=(IStreamReader &&) &=default
virtual int64_t read(void *destination, int64_t nbBytes)=0
Read the next number of bytes in the stream.
IStreamReader(IStreamReader &&)=default
IStreamReader(IStreamReader const &)=default
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:620
Definition: NvInferRuntime.h:720
IStreamReaderV2 & operator=(IStreamReaderV2 const &) &=default
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:732
IStreamReaderV2(IStreamReaderV2 &&)=default
~IStreamReaderV2() override=default
virtual int64_t read(void *destination, int64_t nbBytes, cudaStream_t stream) noexcept=0
Read the next number of bytes in the stream asynchronously.
IStreamReaderV2(IStreamReaderV2 const &)=default
virtual bool seek(int64_t offset, SeekPosition where) noexcept=0
Sets the position of the stream to the given offset.
IStreamReaderV2 & operator=(IStreamReaderV2 &&) &=default
Definition: NvInferRuntime.h:643
IStreamWriter & operator=(IStreamWriter const &) &=default
IStreamWriter(IStreamWriter &&)=default
virtual int64_t write(void const *data, int64_t nbBytes)=0
write nbBytes of data into the stream.
IStreamWriter(IStreamWriter const &)=default
IStreamWriter & operator=(IStreamWriter &&) &=default
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:655
~IStreamWriter() override=default
Definition: NvInferRuntime.h:1182
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1184
virtual int32_t getAliasedInput(int32_t) noexcept
Communicates to TensorRT that the output at the specified output index is aliased to the input at the...
Definition: NvInferRuntime.h:1218
IRefitter * createInferRefitter(ICudaEngine &engine, ILogger &logger) noexcept
Create an instance of an IRefitter class.
Definition: NvInferRuntime.h:5074
IRuntime * createInferRuntime(ILogger &logger) noexcept
Create an instance of an IRuntime class.
Definition: NvInferRuntime.h:5063
The TensorRT API version 1 namespace.
Definition: NvInferSafePlugin.h:33
uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferRuntime.h:2780
v_1_0::IOutputAllocator IOutputAllocator
Definition: NvInferRuntime.h:3903
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:76
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:178
@ kCEIL_DIV
Division rounding up.
v_1_0::IPluginV3OneCore IPluginV3OneCore
Definition: NvInferRuntime.h:1235
TensorIOMode
Definition of tensor IO Mode.
Definition: NvInferRuntimeBase.h:662
HardwareCompatibilityLevel
Describes requirements of compatibility with GPU architectures other than that of the GPU on which th...
Definition: NvInfer.h:10397
SerializationFlag
List of valid flags that the engine can enable when serializing the bytes.
Definition: NvInferRuntime.h:2821
@ kEXCLUDE_WEIGHTS
Exclude the weights that can be refitted.
@ kINCLUDE_REFIT
Remain refittable if originally so.
v_1_0::IStreamWriter IStreamWriter
Definition: NvInferRuntime.h:699
v_1_0::IProfiler IProfiler
Definition: NvInferRuntime.h:1309
SeekPosition
Controls the seek mode of IStreamReaderV2.
Definition: NvInferRuntime.h:706
@ kSET
From the beginning of the file.
@ kCUR
From the current position of the file.
@ kEND
From the tail of the file.
v_1_0::IStreamReaderV2 IStreamReaderV2
Definition: NvInferRuntime.h:776
uint32_t TempfileControlFlags
Represents a collection of one or more TempfileControlFlag values combined using bitwise-OR operation...
Definition: NvInferRuntime.h:1387
EngineStat
The kind of engine statistics that queried from the ICudaEngine.
Definition: NvInferRuntime.h:2990
@ kTOTAL_WEIGHTS_SIZE
Return the total weight size in bytes.
@ kSTRIPPED_WEIGHTS_SIZE
Return the stripped weight size in bytes for engines built with BuilderFlag::kSTRIP_PLAN.
v_1_0::IGpuAllocator IGpuAllocator
Definition: NvInferRuntime.h:1851
v_1_0::ILogger ILogger
Definition: NvInferRuntimeBase.h:125
char_t AsciiChar
Definition: NvInferRuntimeBase.h:116
TensorRTPhase
Indicates a phase of operation of TensorRT.
Definition: NvInferPluginBase.h:116
@ kV2_DYNAMICEXT
IPluginV2DynamicExt.
DataType
The type of weights and tensors. The datatypes other than kBOOL, kINT32, and kINT64 are "activation d...
Definition: NvInferRuntimeBase.h:149
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:1341
@ kSCALE
Scale layer.
@ kCONSTANT
Constant layer.
v_1_0::IDebugListener IDebugListener
Definition: NvInferRuntime.h:3944
TempfileControlFlag
Flags used to control TensorRT's behavior when creating executable temporary files.
Definition: NvInferRuntime.h:1364
@ kALLOW_IN_MEMORY_FILES
Allow creating and loading files in-memory (or unnamed files).
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:1319
@ kSHIFT
shift part of IScaleLayer
@ kANY
Any other weights role.
@ kBIAS
bias for IConvolutionLayer or IDeconvolutionLayer
@ kKERNEL
kernel for IConvolutionLayer or IDeconvolutionLayer
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
Definition: NvInferRuntime.h:2792
@ kLAYER_NAMES_ONLY
Print only the layer names. This is the default setting.
@ kDETAILED
Print detailed layer information including layer names and layer parameters.
TacticSource
List of tactic sources for TensorRT.
Definition: NvInferRuntime.h:2756
TensorFormat PluginFormat
PluginFormat is reserved for backward compatibility.
Definition: NvInferRuntimePlugin.h:54
v_1_0::IPluginV3OneRuntime IPluginV3OneRuntime
Definition: NvInferRuntime.h:1259
@ kSUB
Subtract the second element from the first.
@ kSUM
Sum of the two elements.
@ kPROD
Product of the two elements.
@ kFLOOR_DIV
Floor division of the first element by the second.
@ kEQUAL
Check if two elements are equal.
@ kMIN
Minimum of the two elements.
@ kLESS
Check if element in first tensor is less than corresponding element in second tensor.
uint32_t SerializationFlags
Represents one or more SerializationFlag values using binary OR operations, e.g., 1U << Serialization...
Definition: NvInferRuntime.h:2811
@ kLINEAR
Supports linear (1D), bilinear (2D), and trilinear (3D) interpolation.
v_1_0::IPluginV3OneBuild IPluginV3OneBuild
Definition: NvInferRuntime.h:1247
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntime.h:1421
ExecutionContextAllocationStrategy
Different memory allocation behaviors for IExecutionContext.
Definition: NvInferRuntime.h:2928
@ kSTATIC
Default static allocation with the maximum size across all profiles.
@ kUSER_MANAGED
The user supplies custom allocation to the execution context.
@ kON_PROFILE_CHANGE
Reallocate for a profile when it's selected.
v_1_0::ILoggerFinder ILoggerFinder
Definition: NvInferRuntime.h:5152
LayerInformationFormat
The format in which the IEngineInspector prints the layer information.
Definition: NvInferRuntime.h:4878
@ kJSON
Print layer information in JSON format.
@ kONELINE
Print layer information in one line per layer.
v_1_0::IStreamReader IStreamReader
Definition: NvInferRuntime.h:689
AllocatorFlag
Allowed type of memory allocation.
Definition: NvInferRuntime.h:1542
@ kRESIZABLE
TensorRT may call realloc() on this allocation.
@ kMAX
Maximum over elements.
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntime.h:203
@ kHOST
Data stored on host.
@ kDEVICE
Data stored on device.
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:2539
@ kOPT
This is used to set or get the value that is used in the optimization (kernel selection).
uint32_t AllocatorFlags
Definition: NvInferRuntime.h:1555
Severity
Enumerates severity levels for messages issued by the message recorder.
Definition: NvInferSafeRecorder.h:55
Summarizes tensors that a plugin might see for an input or output.
Definition: NvInferRuntime.h:362
Dims min
Lower bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:367
Dims max
Upper bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:370
Dims opt
Optimum value of tensor’s dimensions specified for auto-tuning.
Definition: NvInferRuntime.h:373
PluginTensorDesc desc
Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of ...
Definition: NvInferRuntime.h:364
Plugin field collection struct.
Definition: NvInferPluginBase.h:103
Fields that a plugin might see for an input or output.
Definition: NvInferRuntimePlugin.h:73
Declaration of EnumMaxImpl struct to store the exclusive upper bound of an enumeration type.
Definition: NvInferRuntimeBase.h:131

  Copyright © 2024 NVIDIA Corporation
  Privacy Policy | Manage My Privacy | Do Not Sell or Share My Data | Terms of Service | Accessibility | Corporate Policies | Product Security | Contact