TensorRT 10.2.0
NvInferRuntime.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef NV_INFER_RUNTIME_H
19#define NV_INFER_RUNTIME_H
20
26
27#include "NvInferImpl.h"
29
30namespace nvinfer1
31{
32
33class IExecutionContext;
34class ICudaEngine;
35class IPluginFactory;
36class IEngineInspector;
37
46
48{
49protected:
50 INoCopy() = default;
51 virtual ~INoCopy() = default;
52 INoCopy(INoCopy const& other) = delete;
53 INoCopy& operator=(INoCopy const& other) = delete;
54 INoCopy(INoCopy&& other) = delete;
55 INoCopy& operator=(INoCopy&& other) = delete;
56};
57
72enum class EngineCapability : int32_t
73{
78 kSTANDARD = 0,
79
86 kSAFETY = 1,
87
94};
95
96namespace impl
97{
99template <>
101{
102 static constexpr int32_t kVALUE = 3;
103};
104} // namespace impl
105
121{
122public:
124 void const* values;
125 int64_t count;
126};
127
138class IHostMemory : public INoCopy
139{
140public:
141 virtual ~IHostMemory() noexcept = default;
142
144 void* data() const noexcept
145 {
146 return mImpl->data();
147 }
148
150 std::size_t size() const noexcept
151 {
152 return mImpl->size();
153 }
154
156 DataType type() const noexcept
157 {
158 return mImpl->type();
159 }
160
161protected:
162 apiv::VHostMemory* mImpl;
163};
164
175enum class DimensionOperation : int32_t
176{
177 kSUM = 0,
178 kPROD = 1,
179 kMAX = 2,
180 kMIN = 3,
181 kSUB = 4,
182 kEQUAL = 5,
183 kLESS = 6,
184 kFLOOR_DIV = 7,
185 kCEIL_DIV = 8
186};
187
189template <>
190constexpr inline int32_t EnumMax<DimensionOperation>() noexcept
191{
192 return 9;
193}
194
200enum class TensorLocation : int32_t
201{
202 kDEVICE = 0,
203 kHOST = 1,
204};
205
206namespace impl
207{
209template <>
211{
212 static constexpr int32_t kVALUE = 2;
213};
214} // namespace impl
215
229{
230public:
234 bool isConstant() const noexcept
235 {
236 return mImpl->isConstant();
237 }
238
245 int64_t getConstantValue() const noexcept
246 {
247 return mImpl->getConstantValue();
248 }
249
250protected:
251 apiv::VDimensionExpr* mImpl;
252 virtual ~IDimensionExpr() noexcept = default;
253
254public:
260 bool isSizeTensor() const noexcept
261 {
262 return mImpl->isSizeTensor();
263 }
264};
265
283class IExprBuilder : public INoCopy
284{
285public:
289 IDimensionExpr const* constant(int64_t value) noexcept
290 {
291 return mImpl->constant(value);
292 }
293
301 DimensionOperation op, IDimensionExpr const& first, IDimensionExpr const& second) noexcept
302 {
303 return mImpl->operation(op, first, second);
304 }
305
306protected:
307 apiv::VExprBuilder* mImpl;
308 virtual ~IExprBuilder() noexcept = default;
309
310public:
335 IDimensionExpr const* declareSizeTensor(int32_t outputIndex, IDimensionExpr const& opt, IDimensionExpr const& upper)
336 {
337 return mImpl->declareSizeTensor(outputIndex, opt, upper);
338 }
339};
340
347{
348public:
349 int32_t nbDims;
351};
352
359{
362
365
368
371};
372
404{
405public:
406 IPluginV2DynamicExt* clone() const noexcept override = 0;
407
432 virtual DimsExprs getOutputDimensions(
433 int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept = 0;
434
438 static constexpr int32_t kFORMAT_COMBINATION_LIMIT = 100;
439
472 virtual bool supportsFormatCombination(
473 int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
474
512 virtual void configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs,
513 DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
514
524 virtual size_t getWorkspaceSize(PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs,
525 int32_t nbOutputs) const noexcept = 0;
526
539 virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc,
540 void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0;
541
542protected:
550 int32_t getTensorRTVersion() const noexcept override
551 {
552 return (static_cast<int32_t>(PluginVersion::kV2_DYNAMICEXT) << 24 | (NV_TENSORRT_VERSION & 0xFFFFFF));
553 }
554
555 virtual ~IPluginV2DynamicExt() noexcept {}
556
557private:
558 // Following are obsolete base class methods, and must not be implemented or used.
559
563 void configurePlugin(Dims const*, int32_t, Dims const*, int32_t, DataType const*, DataType const*, bool const*,
564 bool const*, PluginFormat, int32_t) noexcept override final
565 {
566 }
567
571 bool supportsFormat(DataType, PluginFormat) const noexcept override final
572 {
573 return false;
574 }
575
579 Dims getOutputDimensions(int32_t, Dims const*, int32_t) noexcept override final
580 {
581 return Dims{-1, {}};
582 }
583
591 TRT_DEPRECATED bool isOutputBroadcastAcrossBatch(int32_t, bool const*, int32_t) const noexcept override final
592 {
593 return false;
594 }
595
603 TRT_DEPRECATED bool canBroadcastInputAcrossBatch(int32_t) const noexcept override final
604 {
605 return true;
606 }
607
611 size_t getWorkspaceSize(int32_t) const noexcept override final
612 {
613 return 0;
614 }
615
619 int32_t enqueue(int32_t, void const* const*, void* const*, void*, cudaStream_t) noexcept override final
620 {
621 return 1;
622 }
623};
624
639{
640public:
645 virtual IGpuAllocator* getGpuAllocator() const noexcept = 0;
646
651 virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
652 virtual ~IPluginResourceContext() noexcept = default;
653
654protected:
658 IPluginResourceContext& operator=(IPluginResourceContext const&) & = default;
660};
661
662namespace v_1_0
663{
665{
666};
667} // namespace v_1_0
668
683
684namespace v_1_0
685{
687{
688public:
692 InterfaceInfo getInterfaceInfo() const noexcept override
693 {
694 return InterfaceInfo{"PLUGIN", 1, 0};
695 }
696
709
720 virtual IPluginV3* clone() noexcept = 0;
721};
722
723} // namespace v_1_0
724
736using IPluginV3 = v_1_0::IPluginV3;
737
738namespace v_1_0
739{
741{
742public:
746 InterfaceInfo getInterfaceInfo() const noexcept override
747 {
748 return InterfaceInfo{"PLUGIN_V3ONE_CORE", 1, 0};
749 }
750
759 virtual AsciiChar const* getPluginName() const noexcept = 0;
760
769 virtual AsciiChar const* getPluginVersion() const noexcept = 0;
770
780 virtual AsciiChar const* getPluginNamespace() const noexcept = 0;
781};
782
784{
785public:
791 static constexpr int32_t kDEFAULT_FORMAT_COMBINATION_LIMIT = 100;
792
796 InterfaceInfo getInterfaceInfo() const noexcept override
797 {
798 return InterfaceInfo{"PLUGIN_V3ONE_BUILD", 1, 0};
799 }
800
818 virtual int32_t configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs,
819 DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
820
837 virtual int32_t getOutputDataTypes(
838 DataType* outputTypes, int32_t nbOutputs, const DataType* inputTypes, int32_t nbInputs) const noexcept = 0;
839
860 virtual int32_t getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, DimsExprs const* shapeInputs,
861 int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept = 0;
862
898 int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
899
905 virtual int32_t getNbOutputs() const noexcept = 0;
906
916 virtual size_t getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs,
917 DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept
918 {
919 return 0;
920 }
921
953 virtual int32_t getValidTactics(int32_t* tactics, int32_t nbTactics) noexcept
954 {
955 return 0;
956 }
957
961 virtual int32_t getNbTactics() noexcept
962 {
963 return 0;
964 }
965
977 virtual char const* getTimingCacheID() noexcept
978 {
979 return nullptr;
980 }
981
985 virtual int32_t getFormatCombinationLimit() noexcept
986 {
987 return kDEFAULT_FORMAT_COMBINATION_LIMIT;
988 }
989
996 virtual char const* getMetadataString() noexcept
997 {
998 return nullptr;
999 }
1000};
1001
1003{
1004public:
1008 InterfaceInfo getInterfaceInfo() const noexcept override
1009 {
1010 return InterfaceInfo{"PLUGIN_V3ONE_RUNTIME", 1, 0};
1011 }
1012
1020 virtual int32_t setTactic(int32_t tactic) noexcept
1021 {
1022 return 0;
1023 }
1024
1043 virtual int32_t onShapeChange(
1044 PluginTensorDesc const* in, int32_t nbInputs, PluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
1045
1059 virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc,
1060 void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0;
1061
1081 virtual IPluginV3* attachToContext(IPluginResourceContext* context) noexcept = 0;
1082
1088
1092 virtual PluginFieldCollection const* getFieldsToSerialize() noexcept = 0;
1093};
1094} // namespace v_1_0
1095
1106
1118
1130
1131namespace v_1_0
1132{
1134{
1135public:
1139 InterfaceInfo getInterfaceInfo() const noexcept override
1140 {
1141 return InterfaceInfo{"PLUGIN CREATOR_V3ONE", 1, 0};
1142 }
1143
1157 AsciiChar const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept = 0;
1158
1165 virtual PluginFieldCollection const* getFieldNames() noexcept = 0;
1166
1173 virtual AsciiChar const* getPluginName() const noexcept = 0;
1174
1181 virtual AsciiChar const* getPluginVersion() const noexcept = 0;
1182
1189 virtual AsciiChar const* getPluginNamespace() const noexcept = 0;
1190
1192 virtual ~IPluginCreatorV3One() = default;
1193
1194protected:
1197 IPluginCreatorV3One& operator=(IPluginCreatorV3One const&) & = default;
1198 IPluginCreatorV3One& operator=(IPluginCreatorV3One&&) & = default;
1199};
1200} // namespace v_1_0
1201
1211
1212namespace v_1_0
1213{
1215{
1216public:
1224 virtual void reportLayerTime(char const* layerName, float ms) noexcept = 0;
1225
1226 virtual ~IProfiler() noexcept {}
1227};
1228} // namespace v_1_0
1229
1242
1250enum class WeightsRole : int32_t
1251{
1252 kKERNEL = 0,
1253 kBIAS = 1,
1254 kSHIFT = 2,
1255 kSCALE = 3,
1256 kCONSTANT = 4,
1257 kANY = 5,
1258};
1259
1261template <>
1262constexpr inline int32_t EnumMax<WeightsRole>() noexcept
1263{
1264 return 6;
1265}
1266
1272enum class DeviceType : int32_t
1273{
1274 kGPU = 0,
1275 kDLA = 1,
1276};
1277
1279template <>
1280constexpr inline int32_t EnumMax<DeviceType>() noexcept
1281{
1282 return 2;
1283}
1284
1295enum class TempfileControlFlag : int32_t
1296{
1299
1304};
1305
1307template <>
1308constexpr inline int32_t EnumMax<TempfileControlFlag>() noexcept
1309{
1310 return 2;
1311}
1312
1319using TempfileControlFlags = uint32_t;
1320
1328class IRuntime : public INoCopy
1329{
1330public:
1331 virtual ~IRuntime() noexcept = default;
1332
1344 void setDLACore(int32_t dlaCore) noexcept
1345 {
1346 mImpl->setDLACore(dlaCore);
1347 }
1348
1354 int32_t getDLACore() const noexcept
1355 {
1356 return mImpl->getDLACore();
1357 }
1358
1362 int32_t getNbDLACores() const noexcept
1363 {
1364 return mImpl->getNbDLACores();
1365 }
1366
1377 void setGpuAllocator(IGpuAllocator* allocator) noexcept
1378 {
1379 mImpl->setGpuAllocator(allocator);
1380 }
1381
1393 //
1396 void setErrorRecorder(IErrorRecorder* recorder) noexcept
1397 {
1398 mImpl->setErrorRecorder(recorder);
1399 }
1400
1412 {
1413 return mImpl->getErrorRecorder();
1414 }
1415
1426 ICudaEngine* deserializeCudaEngine(void const* blob, std::size_t size) noexcept
1427 {
1428 return mImpl->deserializeCudaEngine(blob, size);
1429 }
1430
1445 {
1446 return mImpl->deserializeCudaEngine(streamReader);
1447 }
1448
1454 ILogger* getLogger() const noexcept
1455 {
1456 return mImpl->getLogger();
1457 }
1458
1469 bool setMaxThreads(int32_t maxThreads) noexcept
1470 {
1471 return mImpl->setMaxThreads(maxThreads);
1472 }
1473
1483 int32_t getMaxThreads() const noexcept
1484 {
1485 return mImpl->getMaxThreads();
1486 }
1487
1518 void setTemporaryDirectory(char const* path) noexcept
1519 {
1520 return mImpl->setTemporaryDirectory(path);
1521 }
1522
1529 char const* getTemporaryDirectory() const noexcept
1530 {
1531 return mImpl->getTemporaryDirectory();
1532 }
1533
1546 {
1547 return mImpl->setTempfileControlFlags(flags);
1548 }
1549
1558 {
1559 return mImpl->getTempfileControlFlags();
1560 }
1561
1568 {
1569 return mImpl->getPluginRegistry();
1570 }
1571
1585 IRuntime* loadRuntime(char const* path) noexcept
1586 {
1587 return mImpl->loadRuntime(path);
1588 }
1589
1597 void setEngineHostCodeAllowed(bool allowed) noexcept
1598 {
1599 return mImpl->setEngineHostCodeAllowed(allowed);
1600 }
1601
1607 bool getEngineHostCodeAllowed() const noexcept
1608 {
1609 return mImpl->getEngineHostCodeAllowed();
1610 }
1611
1612protected:
1613 apiv::VRuntime* mImpl;
1614};
1615
1623class IRefitter : public INoCopy
1624{
1625public:
1626 virtual ~IRefitter() noexcept = default;
1627
1643 bool setWeights(char const* layerName, WeightsRole role, Weights weights) noexcept
1644 {
1645 return mImpl->setWeights(layerName, role, weights);
1646 }
1647
1660 bool refitCudaEngine() noexcept
1661 {
1662 return mImpl->refitCudaEngine();
1663 }
1664
1681 int32_t getMissing(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
1682 {
1683 return mImpl->getMissing(size, layerNames, roles);
1684 }
1685
1698 int32_t getAll(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
1699 {
1700 return mImpl->getAll(size, layerNames, roles);
1701 }
1702
1720 TRT_DEPRECATED bool setDynamicRange(char const* tensorName, float min, float max) noexcept
1721 {
1722 return mImpl->setDynamicRange(tensorName, min, max);
1723 }
1724
1736 TRT_DEPRECATED float getDynamicRangeMin(char const* tensorName) const noexcept
1737 {
1738 return mImpl->getDynamicRangeMin(tensorName);
1739 }
1740
1752 TRT_DEPRECATED float getDynamicRangeMax(char const* tensorName) const noexcept
1753 {
1754 return mImpl->getDynamicRangeMax(tensorName);
1755 }
1756
1770 TRT_DEPRECATED int32_t getTensorsWithDynamicRange(int32_t size, char const** tensorNames) const noexcept
1771 {
1772 return mImpl->getTensorsWithDynamicRange(size, tensorNames);
1773 }
1774
1786 //
1789 void setErrorRecorder(IErrorRecorder* recorder) noexcept
1790 {
1791 mImpl->setErrorRecorder(recorder);
1792 }
1793
1805 {
1806 return mImpl->getErrorRecorder();
1807 }
1808
1829 bool setNamedWeights(char const* name, Weights weights) noexcept
1830 {
1831 return mImpl->setNamedWeights(name, weights);
1832 }
1833
1849 int32_t getMissingWeights(int32_t size, char const** weightsNames) noexcept
1850 {
1851 return mImpl->getMissingWeights(size, weightsNames);
1852 }
1853
1865 int32_t getAllWeights(int32_t size, char const** weightsNames) noexcept
1866 {
1867 return mImpl->getAllWeights(size, weightsNames);
1868 }
1869
1875 ILogger* getLogger() const noexcept
1876 {
1877 return mImpl->getLogger();
1878 }
1879
1891 bool setMaxThreads(int32_t maxThreads) noexcept
1892 {
1893 return mImpl->setMaxThreads(maxThreads);
1894 }
1895
1905 int32_t getMaxThreads() const noexcept
1906 {
1907 return mImpl->getMaxThreads();
1908 }
1909
1932 bool setNamedWeights(char const* name, Weights weights, TensorLocation location) noexcept
1933 {
1934 return mImpl->setNamedWeightsWithLocation(name, weights, location);
1935 }
1936
1948 Weights getNamedWeights(char const* weightsName) const noexcept
1949 {
1950 return mImpl->getNamedWeights(weightsName);
1951 }
1952
1964 TensorLocation getWeightsLocation(char const* weightsName) const noexcept
1965 {
1966 return mImpl->getWeightsLocation(weightsName);
1967 }
1968
1980 bool unsetNamedWeights(char const* weightsName) noexcept
1981 {
1982 return mImpl->unsetNamedWeights(weightsName);
1983 }
1984
1996 void setWeightsValidation(bool weightsValidation) noexcept
1997 {
1998 return mImpl->setWeightsValidation(weightsValidation);
1999 }
2000
2004 bool getWeightsValidation() const noexcept
2005 {
2006 return mImpl->getWeightsValidation();
2007 }
2008
2026 bool refitCudaEngineAsync(cudaStream_t stream) noexcept
2027 {
2028 return mImpl->refitCudaEngineAsync(stream);
2029 }
2030
2044 Weights getWeightsPrototype(char const* weightsName) const noexcept
2045 {
2046 return mImpl->getWeightsPrototype(weightsName);
2047 }
2048
2049protected:
2050 apiv::VRefitter* mImpl;
2051};
2052
2063enum class OptProfileSelector : int32_t
2064{
2065 kMIN = 0,
2066 kOPT = 1,
2067 kMAX = 2
2068};
2069
2075template <>
2076constexpr inline int32_t EnumMax<OptProfileSelector>() noexcept
2077{
2078 return 3;
2079}
2080
2104{
2105public:
2133 bool setDimensions(char const* inputName, OptProfileSelector select, Dims const& dims) noexcept
2134 {
2135 return mImpl->setDimensions(inputName, select, dims);
2136 }
2137
2145 Dims getDimensions(char const* inputName, OptProfileSelector select) const noexcept
2146 {
2147 return mImpl->getDimensions(inputName, select);
2148 }
2149
2193 char const* inputName, OptProfileSelector select, int32_t const* values, int32_t nbValues) noexcept
2194 {
2195 return mImpl->setShapeValues(inputName, select, values, nbValues);
2196 }
2197
2206 int32_t getNbShapeValues(char const* inputName) const noexcept
2207 {
2208 return mImpl->getNbShapeValues(inputName);
2209 }
2210
2218 int32_t const* getShapeValues(char const* inputName, OptProfileSelector select) const noexcept
2219 {
2220 return mImpl->getShapeValues(inputName, select);
2221 }
2222
2236 bool setExtraMemoryTarget(float target) noexcept
2237 {
2238 return mImpl->setExtraMemoryTarget(target);
2239 }
2240
2248 float getExtraMemoryTarget() const noexcept
2249 {
2250 return mImpl->getExtraMemoryTarget();
2251 }
2252
2265 bool isValid() const noexcept
2266 {
2267 return mImpl->isValid();
2268 }
2269
2270protected:
2271 apiv::VOptimizationProfile* mImpl;
2272 virtual ~IOptimizationProfile() noexcept = default;
2273};
2274
2282enum class TacticSource : int32_t
2283{
2288
2292
2297
2302
2306};
2307
2308template <>
2309constexpr inline int32_t EnumMax<TacticSource>() noexcept
2310{
2311 return 5;
2312}
2313
2320using TacticSources = uint32_t;
2321
2331enum class ProfilingVerbosity : int32_t
2332{
2333 kLAYER_NAMES_ONLY = 0,
2334 kNONE = 1,
2335 kDETAILED = 2,
2336};
2337
2339template <>
2340constexpr inline int32_t EnumMax<ProfilingVerbosity>() noexcept
2341{
2342 return 3;
2343}
2344
2351using SerializationFlags = uint32_t;
2352
2360enum class SerializationFlag : int32_t
2361{
2362 kEXCLUDE_WEIGHTS = 0,
2364};
2365
2367template <>
2368constexpr inline int32_t EnumMax<SerializationFlag>() noexcept
2369{
2370 return 2;
2371}
2372
2381{
2382public:
2383 virtual ~ISerializationConfig() noexcept = default;
2384
2396 bool setFlags(SerializationFlags serializationFlags) noexcept
2397 {
2398 return mImpl->setFlags(serializationFlags);
2399 }
2400
2409 {
2410 return mImpl->getFlags();
2411 }
2412
2420 bool clearFlag(SerializationFlag serializationFlag) noexcept
2421 {
2422 return mImpl->clearFlag(serializationFlag);
2423 }
2424
2432 bool setFlag(SerializationFlag serializationFlag) noexcept
2433 {
2434 return mImpl->setFlag(serializationFlag);
2435 }
2436
2444 bool getFlag(SerializationFlag serializationFlag) const noexcept
2445 {
2446 return mImpl->getFlag(serializationFlag);
2447 }
2448
2449protected:
2450 apiv::VSerializationConfig* mImpl;
2451};
2452
2465{
2466 kSTATIC = 0,
2467 kON_PROFILE_CHANGE = 1,
2468 kUSER_MANAGED = 2,
2469};
2470
2476template <>
2477constexpr inline int32_t EnumMax<ExecutionContextAllocationStrategy>() noexcept
2478{
2479 return 3;
2480}
2481
2489class ICudaEngine : public INoCopy
2490{
2491public:
2492 virtual ~ICudaEngine() noexcept = default;
2493
2504 Dims getTensorShape(char const* tensorName) const noexcept
2505 {
2506 return mImpl->getTensorShape(tensorName);
2507 }
2508
2519 DataType getTensorDataType(char const* tensorName) const noexcept
2520 {
2521 return mImpl->getTensorDataType(tensorName);
2522 }
2523
2533 int32_t getNbLayers() const noexcept
2534 {
2535 return mImpl->getNbLayers();
2536 }
2537
2547 IHostMemory* serialize() const noexcept
2548 {
2549 return mImpl->serialize();
2550 }
2551
2566 {
2567 return mImpl->createExecutionContext(strategy);
2568 }
2569
2582 TensorLocation getTensorLocation(char const* tensorName) const noexcept
2583 {
2584 return mImpl->getTensorLocation(tensorName);
2585 }
2586
2602 bool isShapeInferenceIO(char const* tensorName) const noexcept
2603 {
2604 return mImpl->isShapeInferenceIO(tensorName);
2605 }
2606
2616 TensorIOMode getTensorIOMode(char const* tensorName) const noexcept
2617 {
2618 return mImpl->getTensorIOMode(tensorName);
2619 }
2620
2629 {
2630 return mImpl->createExecutionContextWithoutDeviceMemory();
2631 }
2632
2640 TRT_DEPRECATED size_t getDeviceMemorySize() const noexcept
2641 {
2642 return mImpl->getDeviceMemorySize();
2643 }
2644
2652 TRT_DEPRECATED size_t getDeviceMemorySizeForProfile(int32_t profileIndex) const noexcept
2653 {
2654 return mImpl->getDeviceMemorySizeForProfile(profileIndex);
2655 }
2656
2668 int64_t getDeviceMemorySizeV2() const noexcept
2669 {
2670 return mImpl->getDeviceMemorySizeV2();
2671 }
2672
2684 int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept
2685 {
2686 return mImpl->getDeviceMemorySizeForProfileV2(profileIndex);
2687 }
2688
2694 bool isRefittable() const noexcept
2695 {
2696 return mImpl->isRefittable();
2697 }
2698
2715 int32_t getTensorBytesPerComponent(char const* tensorName) const noexcept
2716 {
2717 return mImpl->getTensorBytesPerComponent(tensorName);
2718 }
2719
2733 int32_t getTensorBytesPerComponent(char const* tensorName, int32_t profileIndex) const noexcept
2734 {
2735 return mImpl->getTensorBytesPerComponentV2(tensorName, profileIndex);
2736 }
2737
2754 int32_t getTensorComponentsPerElement(char const* tensorName) const noexcept
2755 {
2756 return mImpl->getTensorComponentsPerElement(tensorName);
2757 }
2758
2772 int32_t getTensorComponentsPerElement(char const* tensorName, int32_t profileIndex) const noexcept
2773 {
2774 return mImpl->getTensorComponentsPerElementV2(tensorName, profileIndex);
2775 }
2776
2787 TensorFormat getTensorFormat(char const* tensorName) const noexcept
2788 {
2789 return mImpl->getTensorFormat(tensorName);
2790 }
2791
2801 TensorFormat getTensorFormat(char const* tensorName, int32_t profileIndex) const noexcept
2802 {
2803 return mImpl->getTensorFormatV2(tensorName, profileIndex);
2804 }
2805
2825 char const* getTensorFormatDesc(char const* tensorName) const noexcept
2826 {
2827 return mImpl->getTensorFormatDesc(tensorName);
2828 }
2829
2848 char const* getTensorFormatDesc(char const* tensorName, int32_t profileIndex) const noexcept
2849 {
2850 return mImpl->getTensorFormatDescV2(tensorName, profileIndex);
2851 }
2852
2865 int32_t getTensorVectorizedDim(char const* tensorName) const noexcept
2866 {
2867 return mImpl->getTensorVectorizedDim(tensorName);
2868 }
2869
2881 int32_t getTensorVectorizedDim(char const* tensorName, int32_t profileIndex) const noexcept
2882 {
2883 return mImpl->getTensorVectorizedDimV2(tensorName, profileIndex);
2884 }
2885
2896 char const* getName() const noexcept
2897 {
2898 return mImpl->getName();
2899 }
2900
2907 int32_t getNbOptimizationProfiles() const noexcept
2908 {
2909 return mImpl->getNbOptimizationProfiles();
2910 }
2911
2927 Dims getProfileShape(char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
2928 {
2929 return mImpl->getProfileShape(tensorName, profileIndex, select);
2930 }
2931
2949 int32_t const* getProfileTensorValues(char const* tensorName, int32_t profileIndex, OptProfileSelector select) const
2950 noexcept
2951 {
2952 return mImpl->getProfileTensorValues(tensorName, profileIndex, select);
2953 }
2954
2966 {
2967 return mImpl->getEngineCapability();
2968 }
2969
2984 void setErrorRecorder(IErrorRecorder* recorder) noexcept
2985 {
2986 return mImpl->setErrorRecorder(recorder);
2987 }
2988
3000 {
3001 return mImpl->getErrorRecorder();
3002 }
3003
3014 {
3015 return mImpl->hasImplicitBatchDimension();
3016 }
3017
3030 {
3031 return mImpl->getTacticSources();
3032 }
3033
3042 {
3043 return mImpl->getProfilingVerbosity();
3044 }
3045
3052 {
3053 return mImpl->createEngineInspector();
3054 }
3055
3064 int32_t getNbIOTensors() const noexcept
3065 {
3066 return mImpl->getNbIOTensors();
3067 }
3068
3076 char const* getIOTensorName(int32_t index) const noexcept
3077 {
3078 return mImpl->getIOTensorName(index);
3079 }
3080
3090 {
3091 return mImpl->getHardwareCompatibilityLevel();
3092 }
3093
3104 int32_t getNbAuxStreams() const noexcept
3105 {
3106 return mImpl->getNbAuxStreams();
3107 }
3108
3115 {
3116 return mImpl->createSerializationConfig();
3117 }
3118
3131 {
3132 return mImpl->serializeWithConfig(config);
3133 }
3134
3175 TRT_DEPRECATED bool setWeightStreamingBudget(int64_t gpuMemoryBudget) noexcept
3176 {
3177 return mImpl->setWeightStreamingBudget(gpuMemoryBudget);
3178 }
3179
3196 {
3197 return mImpl->getWeightStreamingBudget();
3198 }
3199
3219 {
3220 return mImpl->getMinimumWeightStreamingBudget();
3221 }
3222
3234 int64_t getStreamableWeightsSize() const noexcept
3235 {
3236 return mImpl->getStreamableWeightsSize();
3237 }
3238
3276 bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept
3277 {
3278 return mImpl->setWeightStreamingBudgetV2(gpuMemoryBudget);
3279 }
3280
3294 int64_t getWeightStreamingBudgetV2() const noexcept
3295 {
3296 return mImpl->getWeightStreamingBudgetV2();
3297 }
3298
3319 int64_t getWeightStreamingAutomaticBudget() const noexcept
3320 {
3321 return mImpl->getWeightStreamingAutomaticBudget();
3322 }
3323
3348 {
3349 return mImpl->getWeightStreamingScratchMemorySize();
3350 }
3351
3361 bool isDebugTensor(char const* name) const noexcept
3362 {
3363 return mImpl->isDebugTensor(name);
3364 }
3365
3366protected:
3367 apiv::VCudaEngine* mImpl;
3368};
3369
3370namespace v_1_0
3371{
3373{
3374public:
3378 InterfaceInfo getInterfaceInfo() const noexcept override
3379 {
3380 return {"IOutputAllocator", 1, 0};
3381 }
3382
3403 char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment) noexcept
3404 {
3405 return nullptr;
3406 }
3407
3431 char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment, cudaStream_t /*stream*/)
3432 {
3433 return reallocateOutput(tensorName, currentMemory, size, alignment);
3434 }
3435
3444 virtual void notifyShape(char const* tensorName, Dims const& dims) noexcept = 0;
3445};
3446} // namespace v_1_0
3447
3456
3457namespace v_1_0
3458{
3460{
3461public:
3465 InterfaceInfo getInterfaceInfo() const noexcept override
3466 {
3467 return {"IDebugListener", 1, 0};
3468 }
3469
3483 virtual bool processDebugTensor(void const* addr, TensorLocation location, DataType type, Dims const& shape,
3484 char const* name, cudaStream_t stream)
3485 = 0;
3486
3487 ~IDebugListener() override = default;
3488};
3489} // namespace v_1_0
3490
3497
3509{
3510public:
3511 virtual ~IExecutionContext() noexcept = default;
3512
3521 void setDebugSync(bool sync) noexcept
3522 {
3523 mImpl->setDebugSync(sync);
3524 }
3525
3531 bool getDebugSync() const noexcept
3532 {
3533 return mImpl->getDebugSync();
3534 }
3535
3541 void setProfiler(IProfiler* profiler) noexcept
3542 {
3543 mImpl->setProfiler(profiler);
3544 }
3545
3551 IProfiler* getProfiler() const noexcept
3552 {
3553 return mImpl->getProfiler();
3554 }
3555
3561 ICudaEngine const& getEngine() const noexcept
3562 {
3563 return mImpl->getEngine();
3564 }
3565
3575 void setName(char const* name) noexcept
3576 {
3577 mImpl->setName(name);
3578 }
3579
3585 char const* getName() const noexcept
3586 {
3587 return mImpl->getName();
3588 }
3589
3612 void setDeviceMemory(void* memory) noexcept
3613 {
3614 mImpl->setDeviceMemory(memory);
3615 }
3616
3634 void setDeviceMemoryV2(void* memory, int64_t size) noexcept
3635 {
3636 return mImpl->setDeviceMemoryV2(memory, size);
3637 }
3638
3655 Dims getTensorStrides(char const* tensorName) const noexcept
3656 {
3657 return mImpl->getTensorStrides(tensorName);
3658 }
3659
3660public:
3670 int32_t getOptimizationProfile() const noexcept
3671 {
3672 return mImpl->getOptimizationProfile();
3673 }
3674
3688 bool setInputShape(char const* tensorName, Dims const& dims) noexcept
3689 {
3690 return mImpl->setInputShape(tensorName, dims);
3691 }
3692
3725 Dims getTensorShape(char const* tensorName) const noexcept
3726 {
3727 return mImpl->getTensorShape(tensorName);
3728 }
3729
3741 bool allInputDimensionsSpecified() const noexcept
3742 {
3743 return mImpl->allInputDimensionsSpecified();
3744 }
3745
3759 {
3760 return mImpl->allInputShapesSpecified();
3761 }
3762
3777 void setErrorRecorder(IErrorRecorder* recorder) noexcept
3778 {
3779 mImpl->setErrorRecorder(recorder);
3780 }
3781
3793 {
3794 return mImpl->getErrorRecorder();
3795 }
3796
3809 bool executeV2(void* const* bindings) noexcept
3810 {
3811 return mImpl->executeV2(bindings);
3812 }
3813
3853 bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
3854 {
3855 return mImpl->setOptimizationProfileAsync(profileIndex, stream);
3856 }
3857
3869 void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
3870 {
3871 mImpl->setEnqueueEmitsProfile(enqueueEmitsProfile);
3872 }
3873
3881 bool getEnqueueEmitsProfile() const noexcept
3882 {
3883 return mImpl->getEnqueueEmitsProfile();
3884 }
3885
3911 bool reportToProfiler() const noexcept
3912 {
3913 return mImpl->reportToProfiler();
3914 }
3915
3952 bool setTensorAddress(char const* tensorName, void* data) noexcept
3953 {
3954 return mImpl->setTensorAddress(tensorName, data);
3955 }
3956
3969 void const* getTensorAddress(char const* tensorName) const noexcept
3970 {
3971 return mImpl->getTensorAddress(tensorName);
3972 }
3973
3992 bool setOutputTensorAddress(char const* tensorName, void* data) noexcept
3993 {
3994 return mImpl->setOutputTensorAddress(tensorName, data);
3995 }
3996
4014 bool setInputTensorAddress(char const* tensorName, void const* data) noexcept
4015 {
4016 return mImpl->setInputTensorAddress(tensorName, data);
4017 }
4018
4033 void* getOutputTensorAddress(char const* tensorName) const noexcept
4034 {
4035 return mImpl->getOutputTensorAddress(tensorName);
4036 }
4037
4066 int32_t inferShapes(int32_t nbMaxNames, char const** tensorNames) noexcept
4067 {
4068 return mImpl->inferShapes(nbMaxNames, tensorNames);
4069 }
4070
4084 {
4085 return mImpl->updateDeviceMemorySizeForShapes();
4086 }
4087
4099 bool setInputConsumedEvent(cudaEvent_t event) noexcept
4100 {
4101 return mImpl->setInputConsumedEvent(event);
4102 }
4103
4109 cudaEvent_t getInputConsumedEvent() const noexcept
4110 {
4111 return mImpl->getInputConsumedEvent();
4112 }
4113
4128 bool setOutputAllocator(char const* tensorName, IOutputAllocator* outputAllocator) noexcept
4129 {
4130 return mImpl->setOutputAllocator(tensorName, outputAllocator);
4131 }
4132
4141 IOutputAllocator* getOutputAllocator(char const* tensorName) const noexcept
4142 {
4143 return mImpl->getOutputAllocator(tensorName);
4144 }
4145
4159 int64_t getMaxOutputSize(char const* tensorName) const noexcept
4160 {
4161 return mImpl->getMaxOutputSize(tensorName);
4162 }
4163
4180 {
4181 return mImpl->setTemporaryStorageAllocator(allocator);
4182 }
4183
4190 {
4191 return mImpl->getTemporaryStorageAllocator();
4192 }
4193
4213 bool enqueueV3(cudaStream_t stream) noexcept
4214 {
4215 return mImpl->enqueueV3(stream);
4216 }
4217
4229 void setPersistentCacheLimit(size_t size) noexcept
4230 {
4231 mImpl->setPersistentCacheLimit(size);
4232 }
4233
4240 size_t getPersistentCacheLimit() const noexcept
4241 {
4242 return mImpl->getPersistentCacheLimit();
4243 }
4244
4264 bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
4265 {
4266 return mImpl->setNvtxVerbosity(verbosity);
4267 }
4268
4277 {
4278 return mImpl->getNvtxVerbosity();
4279 }
4280
4307 void setAuxStreams(cudaStream_t* auxStreams, int32_t nbStreams) noexcept
4308 {
4309 mImpl->setAuxStreams(auxStreams, nbStreams);
4310 }
4311
4319 bool setDebugListener(IDebugListener* listener) noexcept
4320 {
4321 return mImpl->setDebugListener(listener);
4322 }
4323
4330 {
4331 return mImpl->getDebugListener();
4332 }
4333
4348 bool setTensorDebugState(char const* name, bool flag) noexcept
4349 {
4350 return mImpl->setTensorDebugState(name, flag);
4351 }
4352
4361 bool setAllTensorsDebugState(bool flag) noexcept
4362 {
4363 return mImpl->setAllTensorsDebugState(flag);
4364 }
4365
4371 bool getDebugState(char const* name) const noexcept
4372 {
4373 return mImpl->getDebugState(name);
4374 }
4375
4376protected:
4377 apiv::VExecutionContext* mImpl;
4378}; // class IExecutionContext
4379
4387enum class LayerInformationFormat : int32_t
4388{
4389 kONELINE = 0,
4390 kJSON = 1,
4391};
4392
4395template <>
4396constexpr inline int32_t EnumMax<LayerInformationFormat>() noexcept
4397{
4398 return 2;
4399}
4400
4417{
4418public:
4419 virtual ~IEngineInspector() noexcept = default;
4420
4433 bool setExecutionContext(IExecutionContext const* context) noexcept
4434 {
4435 return mImpl->setExecutionContext(context);
4436 }
4437
4446 {
4447 return mImpl->getExecutionContext();
4448 }
4449
4470 char const* getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
4471 {
4472 return mImpl->getLayerInformation(layerIndex, format);
4473 }
4474
4493 char const* getEngineInformation(LayerInformationFormat format) const noexcept
4494 {
4495 return mImpl->getEngineInformation(format);
4496 }
4497
4512 void setErrorRecorder(IErrorRecorder* recorder) noexcept
4513 {
4514 mImpl->setErrorRecorder(recorder);
4515 }
4516
4528 {
4529 return mImpl->getErrorRecorder();
4530 }
4531
4532protected:
4533 apiv::VEngineInspector* mImpl;
4534}; // class IEngineInspector
4535
4536} // namespace nvinfer1
4537
4542extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(void* logger, int32_t version) noexcept;
4543
4548extern "C" TENSORRTAPI void* createInferRefitter_INTERNAL(void* engine, void* logger, int32_t version) noexcept;
4549
4554
4560extern "C" TENSORRTAPI nvinfer1::ILogger* getLogger() noexcept;
4561
4562namespace nvinfer1
4563{
4564namespace // unnamed namespace avoids linkage surprises when linking objects built with different versions of this
4565 // header.
4566{
4572inline IRuntime* createInferRuntime(ILogger& logger) noexcept
4573{
4574 return static_cast<IRuntime*>(createInferRuntime_INTERNAL(&logger, NV_TENSORRT_VERSION));
4575}
4576
4582inline IRefitter* createInferRefitter(ICudaEngine& engine, ILogger& logger) noexcept
4583{
4584 return static_cast<IRefitter*>(createInferRefitter_INTERNAL(&engine, &logger, NV_TENSORRT_VERSION));
4585}
4586
4587} // namespace
4588
4600template <typename T>
4602{
4603public:
4605 {
4606 getPluginRegistry()->registerCreator(instance, "");
4607 }
4608
4609private:
4611 T instance{};
4612};
4613
4614} // namespace nvinfer1
4615
4616#define REGISTER_TENSORRT_PLUGIN(name) \
4617 static nvinfer1::PluginRegistrar<name> pluginRegistrar##name {}
4618
4619namespace nvinfer1
4620{
4630{
4631public:
4639 virtual ILogger* findLogger() = 0;
4640
4641protected:
4642 virtual ~ILoggerFinder() = default;
4643};
4644
4647namespace v_1_0
4648{
4649
4651{
4652public:
4654 ~IGpuAsyncAllocator() override = default;
4655
4685 void* allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags,
4686 cudaStream_t /*stream*/) noexcept override = 0;
4687
4713 bool deallocateAsync(void* const memory, cudaStream_t /*stream*/) noexcept override = 0;
4714
4738 uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept override
4739 {
4740 return allocateAsync(size, alignment, flags, nullptr);
4741 }
4742
4761 TRT_DEPRECATED bool deallocate(void* const memory) noexcept override
4762 {
4763 return deallocateAsync(memory, nullptr);
4764 }
4765
4769 InterfaceInfo getInterfaceInfo() const noexcept override
4770 {
4771 return {"IGpuAllocator", 1, 0};
4772 }
4773};
4774} // namespace v_1_0
4775
4790
4791} // namespace nvinfer1
4792
4796extern "C" TENSORRTAPI int32_t getInferLibMajorVersion() noexcept;
4800extern "C" TENSORRTAPI int32_t getInferLibMinorVersion() noexcept;
4804extern "C" TENSORRTAPI int32_t getInferLibPatchVersion() noexcept;
4808extern "C" TENSORRTAPI int32_t getInferLibBuildVersion() noexcept;
4809
4810#endif // NV_INFER_RUNTIME_H
nvinfer1::ILogger * getLogger() noexcept
Return the logger object.
int32_t getInferLibMajorVersion() noexcept
Return the library major version number.
nvinfer1::IPluginRegistry * getPluginRegistry() noexcept
Return the plugin registry.
int32_t getInferLibPatchVersion() noexcept
Return the library patch version number.
int32_t getInferLibMinorVersion() noexcept
Return the library minor version number.
int32_t getInferLibBuildVersion() noexcept
Return the library build version number.
#define TENSORRTAPI
Definition: NvInferRuntimeBase.h:59
#define NV_TENSORRT_VERSION
Definition: NvInferRuntimeBase.h:93
#define TRT_DEPRECATED
Definition: NvInferRuntimeBase.h:45
#define TRT_DEPRECATED_ENUM
Definition: NvInferRuntimeBase.h:46
Structure to define the dimensions of a tensor.
Definition: NvInferRuntimeBase.h:202
static constexpr int32_t MAX_DIMS
The maximum rank (number of dimensions) supported for a tensor.
Definition: NvInferRuntimeBase.h:205
Analog of class Dims with expressions instead of constants for the dimensions.
Definition: NvInferRuntime.h:347
IDimensionExpr const * d[Dims::MAX_DIMS]
The extent of each dimension.
Definition: NvInferRuntime.h:350
int32_t nbDims
The number of dimensions.
Definition: NvInferRuntime.h:349
An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:2490
int32_t getTensorBytesPerComponent(char const *tensorName) const noexcept
Return the number of bytes per component of an element, or -1 if the tensor is not vectorized or prov...
Definition: NvInferRuntime.h:2715
ISerializationConfig * createSerializationConfig() noexcept
Create a serialization configuration object.
Definition: NvInferRuntime.h:3114
TRT_DEPRECATED int64_t getWeightStreamingBudget() const noexcept
Returns the current weight streaming device memory budget in bytes.
Definition: NvInferRuntime.h:3195
int32_t const * getProfileTensorValues(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values (not dimensions) for an input tensor given its name under ...
Definition: NvInferRuntime.h:2949
char const * getIOTensorName(int32_t index) const noexcept
Return name of an IO tensor.
Definition: NvInferRuntime.h:3076
int64_t getWeightStreamingBudgetV2() const noexcept
Returns the current weight streaming device memory budget in bytes.
Definition: NvInferRuntime.h:3294
EngineCapability getEngineCapability() const noexcept
Determine what execution capability this engine has.
Definition: NvInferRuntime.h:2965
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:2999
TensorFormat getTensorFormat(char const *tensorName, int32_t profileIndex) const noexcept
Return the tensor format of given profile, or TensorFormat::kLINEAR if the provided name does not map...
Definition: NvInferRuntime.h:2801
TRT_DEPRECATED bool hasImplicitBatchDimension() const noexcept
Query whether the engine was built with an implicit batch dimension.
Definition: NvInferRuntime.h:3013
apiv::VCudaEngine * mImpl
Definition: NvInferRuntime.h:3367
TRT_DEPRECATED size_t getDeviceMemorySizeForProfile(int32_t profileIndex) const noexcept
Return the maximum device memory required by the context for a profile.
Definition: NvInferRuntime.h:2652
IExecutionContext * createExecutionContext(ExecutionContextAllocationStrategy strategy=ExecutionContextAllocationStrategy::kSTATIC) noexcept
Create an execution context and specify the strategy for allocating internal activation memory.
Definition: NvInferRuntime.h:2564
char const * getTensorFormatDesc(char const *tensorName) const noexcept
Return the human readable description of the tensor format, or empty string if the provided name does...
Definition: NvInferRuntime.h:2825
Dims getProfileShape(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for an input tensor given its name under an optimizati...
Definition: NvInferRuntime.h:2927
bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept
Limit the maximum amount of GPU memory usable for network weights in bytes.
Definition: NvInferRuntime.h:3276
int32_t getNbAuxStreams() const noexcept
Return the number of auxiliary streams used by this engine.
Definition: NvInferRuntime.h:3104
int64_t getStreamableWeightsSize() const noexcept
Get the total size in bytes of all streamable weights.
Definition: NvInferRuntime.h:3234
DataType getTensorDataType(char const *tensorName) const noexcept
Determine the required data type for a buffer from its tensor name.
Definition: NvInferRuntime.h:2519
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:2984
TacticSources getTacticSources() const noexcept
return the tactic sources required by this engine.
Definition: NvInferRuntime.h:3029
IHostMemory * serializeWithConfig(ISerializationConfig &config) const noexcept
Serialize the network to a stream with the provided SerializationConfig.
Definition: NvInferRuntime.h:3130
virtual ~ICudaEngine() noexcept=default
int64_t getWeightStreamingAutomaticBudget() const noexcept
TensorRT automatically determines a device memory budget for the model to run. The budget is close to...
Definition: NvInferRuntime.h:3319
bool isDebugTensor(char const *name) const noexcept
Check if a tensor is marked as a debug tensor.
Definition: NvInferRuntime.h:3361
int32_t getTensorVectorizedDim(char const *tensorName, int32_t profileIndex) const noexcept
Return the dimension index that the buffer is vectorized of given profile, or -1 if the provided name...
Definition: NvInferRuntime.h:2881
char const * getName() const noexcept
Returns the name of the network associated with the engine.
Definition: NvInferRuntime.h:2896
ProfilingVerbosity getProfilingVerbosity() const noexcept
Return the ProfilingVerbosity the builder config was set to when the engine was built.
Definition: NvInferRuntime.h:3041
bool isShapeInferenceIO(char const *tensorName) const noexcept
True if tensor is required as input for shape calculations or is output from shape calculations.
Definition: NvInferRuntime.h:2602
int64_t getWeightStreamingScratchMemorySize() const noexcept
Returns the size of the scratch memory required by the current weight streaming budget.
Definition: NvInferRuntime.h:3347
TRT_DEPRECATED bool setWeightStreamingBudget(int64_t gpuMemoryBudget) noexcept
Limit the maximum amount of GPU memory usable for network weights in bytes.
Definition: NvInferRuntime.h:3175
int64_t getDeviceMemorySizeV2() const noexcept
Return the maximum device memory required by the context over all profiles.
Definition: NvInferRuntime.h:2668
int32_t getTensorVectorizedDim(char const *tensorName) const noexcept
Return the dimension index that the buffer is vectorized, or -1 if the provided name does not map to ...
Definition: NvInferRuntime.h:2865
TRT_DEPRECATED size_t getDeviceMemorySize() const noexcept
Return the maximum device memory required by the context over all profiles.
Definition: NvInferRuntime.h:2640
int32_t getTensorComponentsPerElement(char const *tensorName, int32_t profileIndex) const noexcept
Return the number of components included in one element of given profile, or -1 if tensor is not vect...
Definition: NvInferRuntime.h:2772
int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept
Return the maximum device memory required by the context for a profile.
Definition: NvInferRuntime.h:2684
TensorFormat getTensorFormat(char const *tensorName) const noexcept
Return the tensor format, or TensorFormat::kLINEAR if the provided name does not map to an input or o...
Definition: NvInferRuntime.h:2787
IHostMemory * serialize() const noexcept
Serialize the network to a stream.
Definition: NvInferRuntime.h:2547
TensorLocation getTensorLocation(char const *tensorName) const noexcept
Get whether an input or output tensor must be on GPU or CPU.
Definition: NvInferRuntime.h:2582
IEngineInspector * createEngineInspector() const noexcept
Create a new engine inspector which prints the layer information in an engine or an execution context...
Definition: NvInferRuntime.h:3051
int32_t getTensorBytesPerComponent(char const *tensorName, int32_t profileIndex) const noexcept
Return the number of bytes per component of an element given of given profile, or -1 if the tensor is...
Definition: NvInferRuntime.h:2733
HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept
Return the hardware compatibility level of this engine.
Definition: NvInferRuntime.h:3089
int32_t getNbOptimizationProfiles() const noexcept
Get the number of optimization profiles defined for this engine.
Definition: NvInferRuntime.h:2907
TRT_DEPRECATED IExecutionContext * createExecutionContextWithoutDeviceMemory() noexcept
create an execution context without any device memory allocated
Definition: NvInferRuntime.h:2628
char const * getTensorFormatDesc(char const *tensorName, int32_t profileIndex) const noexcept
Return the human readable description of the tensor format of given profile, or empty string if the p...
Definition: NvInferRuntime.h:2848
TRT_DEPRECATED int64_t getMinimumWeightStreamingBudget() const noexcept
The minimum number of bytes of GPU memory required by network weights for successful weight streaming...
Definition: NvInferRuntime.h:3218
TensorIOMode getTensorIOMode(char const *tensorName) const noexcept
Determine whether a tensor is an input or output tensor.
Definition: NvInferRuntime.h:2616
int32_t getNbLayers() const noexcept
Get the number of layers in the network.
Definition: NvInferRuntime.h:2533
int32_t getNbIOTensors() const noexcept
Return number of IO tensors.
Definition: NvInferRuntime.h:3064
int32_t getTensorComponentsPerElement(char const *tensorName) const noexcept
Return the number of components included in one element, or -1 if tensor is not vectorized or if the ...
Definition: NvInferRuntime.h:2754
bool isRefittable() const noexcept
Return true if an engine can be refit.
Definition: NvInferRuntime.h:2694
An IDimensionExpr represents an integer expression constructed from constants, input dimensions,...
Definition: NvInferRuntime.h:229
bool isConstant() const noexcept
Return true if expression is a build-time constant.
Definition: NvInferRuntime.h:234
bool isSizeTensor() const noexcept
Return true if this denotes the value of a size tensor.
Definition: NvInferRuntime.h:260
virtual ~IDimensionExpr() noexcept=default
apiv::VDimensionExpr * mImpl
Definition: NvInferRuntime.h:251
int64_t getConstantValue() const noexcept
Get the value of the constant.
Definition: NvInferRuntime.h:245
An engine inspector which prints out the layer information of an engine or an execution context.
Definition: NvInferRuntime.h:4417
char const * getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
Get a string describing the information about a specific layer in the current engine or the execution...
Definition: NvInferRuntime.h:4470
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:4527
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:4512
IExecutionContext const * getExecutionContext() const noexcept
Get the context currently being inspected.
Definition: NvInferRuntime.h:4445
apiv::VEngineInspector * mImpl
Definition: NvInferRuntime.h:4533
virtual ~IEngineInspector() noexcept=default
char const * getEngineInformation(LayerInformationFormat format) const noexcept
Get a string describing the information about all the layers in the current engine or the execution c...
Definition: NvInferRuntime.h:4493
Context for executing inference using an engine, with functionally unsafe features.
Definition: NvInferRuntime.h:3509
IOutputAllocator * getOutputAllocator(char const *tensorName) const noexcept
Get output allocator associated with output tensor of given name, or nullptr if the provided name doe...
Definition: NvInferRuntime.h:4141
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:3792
bool reportToProfiler() const noexcept
Calculate layer timing info for the current optimization profile in IExecutionContext and update the ...
Definition: NvInferRuntime.h:3911
virtual ~IExecutionContext() noexcept=default
void setDeviceMemory(void *memory) noexcept
Set the device memory for use by this execution context.
Definition: NvInferRuntime.h:3612
TRT_DEPRECATED bool allInputShapesSpecified() const noexcept
Whether all input shape bindings have been specified.
Definition: NvInferRuntime.h:3758
bool setTensorDebugState(char const *name, bool flag) noexcept
Set debug state of tensor given the tensor name.
Definition: NvInferRuntime.h:4348
char const * getName() const noexcept
Return the name of the execution context.
Definition: NvInferRuntime.h:3585
IGpuAllocator * getTemporaryStorageAllocator() const noexcept
Get allocator set by setTemporaryStorageAllocator.
Definition: NvInferRuntime.h:4189
void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
Set whether enqueue emits layer timing to the profiler.
Definition: NvInferRuntime.h:3869
Dims getTensorShape(char const *tensorName) const noexcept
Return the shape of the given input or output.
Definition: NvInferRuntime.h:3725
bool getDebugState(char const *name) const noexcept
Definition: NvInferRuntime.h:4371
bool setInputShape(char const *tensorName, Dims const &dims) noexcept
Set shape of given input.
Definition: NvInferRuntime.h:3688
bool executeV2(void *const *bindings) noexcept
Synchronously execute a network.
Definition: NvInferRuntime.h:3809
bool getEnqueueEmitsProfile() const noexcept
Get the enqueueEmitsProfile state.
Definition: NvInferRuntime.h:3881
void const * getTensorAddress(char const *tensorName) const noexcept
Get memory address bound to given input or output tensor, or nullptr if the provided name does not ma...
Definition: NvInferRuntime.h:3969
bool setOutputAllocator(char const *tensorName, IOutputAllocator *outputAllocator) noexcept
Set output allocator to use for output tensor of given name. Pass nullptr to outputAllocator to unset...
Definition: NvInferRuntime.h:4128
bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
Select an optimization profile for the current context with async semantics.
Definition: NvInferRuntime.h:3853
apiv::VExecutionContext * mImpl
Definition: NvInferRuntime.h:4377
bool setOutputTensorAddress(char const *tensorName, void *data) noexcept
Set the memory address for a given output tensor.
Definition: NvInferRuntime.h:3992
void setPersistentCacheLimit(size_t size) noexcept
Set the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:4229
size_t getPersistentCacheLimit() const noexcept
Get the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:4240
bool setAllTensorsDebugState(bool flag) noexcept
Definition: NvInferRuntime.h:4361
ICudaEngine const & getEngine() const noexcept
Get the associated engine.
Definition: NvInferRuntime.h:3561
ProfilingVerbosity getNvtxVerbosity() const noexcept
Get the NVTX verbosity of the execution context.
Definition: NvInferRuntime.h:4276
size_t updateDeviceMemorySizeForShapes() noexcept
Recompute the internal activation buffer sizes based on the current input shapes, and return the tota...
Definition: NvInferRuntime.h:4083
void setAuxStreams(cudaStream_t *auxStreams, int32_t nbStreams) noexcept
Set the auxiliary streams that TensorRT should launch kernels on in the next enqueueV3() call.
Definition: NvInferRuntime.h:4307
int64_t getMaxOutputSize(char const *tensorName) const noexcept
Get upper bound on an output tensor's size, in bytes, based on the current optimization profile and i...
Definition: NvInferRuntime.h:4159
int32_t inferShapes(int32_t nbMaxNames, char const **tensorNames) noexcept
Run shape calculations.
Definition: NvInferRuntime.h:4066
bool setDebugListener(IDebugListener *listener) noexcept
Set DebugListener for this execution context.
Definition: NvInferRuntime.h:4319
bool setTensorAddress(char const *tensorName, void *data) noexcept
Set memory address for given input or output tensor.
Definition: NvInferRuntime.h:3952
bool setTemporaryStorageAllocator(IGpuAllocator *allocator) noexcept
Specify allocator to use for internal temporary storage.
Definition: NvInferRuntime.h:4179
void * getOutputTensorAddress(char const *tensorName) const noexcept
Get memory address for given output.
Definition: NvInferRuntime.h:4033
bool enqueueV3(cudaStream_t stream) noexcept
Enqueue inference on a stream.
Definition: NvInferRuntime.h:4213
IDebugListener * getDebugListener() noexcept
Get the DebugListener of this execution context.
Definition: NvInferRuntime.h:4329
int32_t getOptimizationProfile() const noexcept
Get the index of the currently selected optimization profile.
Definition: NvInferRuntime.h:3670
bool setInputTensorAddress(char const *tensorName, void const *data) noexcept
Set memory address for given input.
Definition: NvInferRuntime.h:4014
bool getDebugSync() const noexcept
Get the debug sync flag.
Definition: NvInferRuntime.h:3531
bool setInputConsumedEvent(cudaEvent_t event) noexcept
Mark input as consumed.
Definition: NvInferRuntime.h:4099
Dims getTensorStrides(char const *tensorName) const noexcept
Return the strides of the buffer for the given tensor name.
Definition: NvInferRuntime.h:3655
bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
Set the verbosity of the NVTX markers in the execution context.
Definition: NvInferRuntime.h:4264
IProfiler * getProfiler() const noexcept
Get the profiler.
Definition: NvInferRuntime.h:3551
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:3777
void setDeviceMemoryV2(void *memory, int64_t size) noexcept
Set the device memory and its corresponding size for use by this execution context.
Definition: NvInferRuntime.h:3634
bool allInputDimensionsSpecified() const noexcept
Whether all dynamic dimensions of input tensors have been specified.
Definition: NvInferRuntime.h:3741
void setProfiler(IProfiler *profiler) noexcept
Set the profiler.
Definition: NvInferRuntime.h:3541
void setName(char const *name) noexcept
Set the name of the execution context.
Definition: NvInferRuntime.h:3575
cudaEvent_t getInputConsumedEvent() const noexcept
The event associated with consuming the input.
Definition: NvInferRuntime.h:4109
Object for constructing IDimensionExpr.
Definition: NvInferRuntime.h:284
IDimensionExpr const * operation(DimensionOperation op, IDimensionExpr const &first, IDimensionExpr const &second) noexcept
Get the operation.
Definition: NvInferRuntime.h:300
virtual ~IExprBuilder() noexcept=default
IDimensionExpr const * constant(int64_t value) noexcept
Return pointer to IDimensionExp for given value.
Definition: NvInferRuntime.h:289
apiv::VExprBuilder * mImpl
Definition: NvInferRuntime.h:307
IDimensionExpr const * declareSizeTensor(int32_t outputIndex, IDimensionExpr const &opt, IDimensionExpr const &upper)
Declare a size tensor at the given output index, with the specified auto-tuning formula and upper bou...
Definition: NvInferRuntime.h:335
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:139
void * data() const noexcept
A pointer to the raw data that is owned by the library.
Definition: NvInferRuntime.h:144
DataType type() const noexcept
The type of the memory that was allocated.
Definition: NvInferRuntime.h:156
std::size_t size() const noexcept
The size in bytes of the data that was allocated.
Definition: NvInferRuntime.h:150
apiv::VHostMemory * mImpl
Definition: NvInferRuntime.h:162
virtual ~IHostMemory() noexcept=default
A virtual base class to find a logger. Allows a plugin to find an instance of a logger if it needs to...
Definition: NvInferRuntime.h:4630
virtual ILogger * findLogger()=0
Get the logger used by the engine or execution context which called the plugin method.
virtual ~ILoggerFinder()=default
Application-implemented logging interface for the builder, refitter and runtime.
Definition: NvInferRuntimeBase.h:683
Forward declaration of IEngineInspector for use by other interfaces.
Definition: NvInferRuntime.h:48
INoCopy & operator=(INoCopy &&other)=delete
INoCopy(INoCopy const &other)=delete
INoCopy(INoCopy &&other)=delete
virtual ~INoCopy()=default
INoCopy & operator=(INoCopy const &other)=delete
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:2104
int32_t const * getShapeValues(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2218
apiv::VOptimizationProfile * mImpl
Definition: NvInferRuntime.h:2271
Dims getDimensions(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:2145
virtual ~IOptimizationProfile() noexcept=default
float getExtraMemoryTarget() const noexcept
Get the extra memory target that has been defined for this profile.
Definition: NvInferRuntime.h:2248
bool setExtraMemoryTarget(float target) noexcept
Set a target for extra GPU memory that may be used by this profile.
Definition: NvInferRuntime.h:2236
bool setDimensions(char const *inputName, OptProfileSelector select, Dims const &dims) noexcept
Set the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:2133
bool setShapeValues(char const *inputName, OptProfileSelector select, int32_t const *values, int32_t nbValues) noexcept
Set the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2192
bool isValid() const noexcept
Check whether the optimization profile can be passed to an IBuilderConfig object.
Definition: NvInferRuntime.h:2265
int32_t getNbShapeValues(char const *inputName) const noexcept
Get the number of values for an input shape tensor.
Definition: NvInferRuntime.h:2206
Single registration point for all plugins in an application. It is used to find plugin implementation...
Definition: NvInferRuntimeCommon.h:56
virtual TRT_DEPRECATED bool registerCreator(IPluginCreator &creator, AsciiChar const *const pluginNamespace) noexcept=0
Register a plugin creator implementing IPluginCreator. Returns false if any plugin creator with the s...
Interface for plugins to access per context resources provided by TensorRT.
Definition: NvInferRuntime.h:639
virtual IErrorRecorder * getErrorRecorder() const noexcept=0
Get the error recorder associated with the resource context.
virtual IGpuAllocator * getGpuAllocator() const noexcept=0
Get the GPU allocator associated with the resource context.
Similar to IPluginV2Ext, but with support for dynamic shapes.
Definition: NvInferRuntime.h:404
IPluginV2DynamicExt * clone() const noexcept override=0
Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin...
virtual ~IPluginV2DynamicExt() noexcept
Definition: NvInferRuntime.h:555
Plugin class for user-implemented layers.
Definition: NvInferRuntimePlugin.h:464
Updates weights in an engine.
Definition: NvInferRuntime.h:1624
bool refitCudaEngineAsync(cudaStream_t stream) noexcept
Enqueue weights refitting of the associated engine on the given stream.
Definition: NvInferRuntime.h:2026
int32_t getMaxThreads() const noexcept
get the maximum number of threads that can be used by the refitter.
Definition: NvInferRuntime.h:1905
TensorLocation getWeightsLocation(char const *weightsName) const noexcept
Get location for the weights associated with the given name.
Definition: NvInferRuntime.h:1964
bool setNamedWeights(char const *name, Weights weights) noexcept
Specify new weights of given name.
Definition: NvInferRuntime.h:1829
int32_t getAllWeights(int32_t size, char const **weightsNames) noexcept
Get names of all weights that could be refit.
Definition: NvInferRuntime.h:1865
ILogger * getLogger() const noexcept
get the logger with which the refitter was created
Definition: NvInferRuntime.h:1875
bool refitCudaEngine() noexcept
Refits associated engine.
Definition: NvInferRuntime.h:1660
int32_t getMissingWeights(int32_t size, char const **weightsNames) noexcept
Get names of missing weights.
Definition: NvInferRuntime.h:1849
TRT_DEPRECATED float getDynamicRangeMax(char const *tensorName) const noexcept
Get maximum of dynamic range.
Definition: NvInferRuntime.h:1752
int32_t getMissing(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of missing weights.
Definition: NvInferRuntime.h:1681
Weights getNamedWeights(char const *weightsName) const noexcept
Get weights associated with the given name.
Definition: NvInferRuntime.h:1948
bool unsetNamedWeights(char const *weightsName) noexcept
Unset weights associated with the given name.
Definition: NvInferRuntime.h:1980
Weights getWeightsPrototype(char const *weightsName) const noexcept
Get the Weights prototype associated with the given name.
Definition: NvInferRuntime.h:2044
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:1891
TRT_DEPRECATED float getDynamicRangeMin(char const *tensorName) const noexcept
Get minimum of dynamic range.
Definition: NvInferRuntime.h:1736
TRT_DEPRECATED int32_t getTensorsWithDynamicRange(int32_t size, char const **tensorNames) const noexcept
Get names of all tensors that have refittable dynamic ranges.
Definition: NvInferRuntime.h:1770
bool setNamedWeights(char const *name, Weights weights, TensorLocation location) noexcept
Specify new weights on a specified device of given name.
Definition: NvInferRuntime.h:1932
void setWeightsValidation(bool weightsValidation) noexcept
Set whether to validate weights during refitting.
Definition: NvInferRuntime.h:1996
TRT_DEPRECATED bool setDynamicRange(char const *tensorName, float min, float max) noexcept
Definition: NvInferRuntime.h:1720
apiv::VRefitter * mImpl
Definition: NvInferRuntime.h:2050
int32_t getAll(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of all weights that could be refit.
Definition: NvInferRuntime.h:1698
virtual ~IRefitter() noexcept=default
bool getWeightsValidation() const noexcept
Get whether to validate weights values during refitting.
Definition: NvInferRuntime.h:2004
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:1789
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:1804
Allows a serialized functionally unsafe engine to be deserialized.
Definition: NvInferRuntime.h:1329
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:1469
IRuntime * loadRuntime(char const *path) noexcept
Load IRuntime from the file.
Definition: NvInferRuntime.h:1585
ICudaEngine * deserializeCudaEngine(IStreamReader &streamReader)
Deserialize an engine from a stream.
Definition: NvInferRuntime.h:1444
bool getEngineHostCodeAllowed() const noexcept
Get whether the runtime is allowed to deserialize engines with host executable code.
Definition: NvInferRuntime.h:1607
TempfileControlFlags getTempfileControlFlags() const noexcept
Get the tempfile control flags for this runtime.
Definition: NvInferRuntime.h:1557
void setEngineHostCodeAllowed(bool allowed) noexcept
Set whether the runtime is allowed to deserialize engines with host executable code.
Definition: NvInferRuntime.h:1597
virtual ~IRuntime() noexcept=default
void setTemporaryDirectory(char const *path) noexcept
Set the directory that will be used by this runtime for temporary files.
Definition: NvInferRuntime.h:1518
IPluginRegistry & getPluginRegistry() noexcept
Get the local plugin registry that can be used by the runtime.
Definition: NvInferRuntime.h:1567
apiv::VRuntime * mImpl
Definition: NvInferRuntime.h:1613
int32_t getNbDLACores() const noexcept
Returns number of DLA hardware cores accessible or 0 if DLA is unavailable.
Definition: NvInferRuntime.h:1362
ICudaEngine * deserializeCudaEngine(void const *blob, std::size_t size) noexcept
Deserialize an engine from host memory.
Definition: NvInferRuntime.h:1426
void setTempfileControlFlags(TempfileControlFlags flags) noexcept
Set the tempfile control flags for this runtime.
Definition: NvInferRuntime.h:1545
int32_t getDLACore() const noexcept
Get the DLA core that the engine executes on.
Definition: NvInferRuntime.h:1354
void setGpuAllocator(IGpuAllocator *allocator) noexcept
Set the GPU allocator.
Definition: NvInferRuntime.h:1377
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:1411
ILogger * getLogger() const noexcept
get the logger with which the runtime was created
Definition: NvInferRuntime.h:1454
int32_t getMaxThreads() const noexcept
Get the maximum number of threads that can be used by the runtime.
Definition: NvInferRuntime.h:1483
char const * getTemporaryDirectory() const noexcept
Get the directory that will be used by this runtime for temporary files.
Definition: NvInferRuntime.h:1529
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:1396
Holds properties for configuring an engine to serialize the binary.
Definition: NvInferRuntime.h:2381
virtual ~ISerializationConfig() noexcept=default
bool clearFlag(SerializationFlag serializationFlag) noexcept
clear a serialization flag.
Definition: NvInferRuntime.h:2420
bool setFlag(SerializationFlag serializationFlag) noexcept
Set a serialization flag.
Definition: NvInferRuntime.h:2432
SerializationFlags getFlags() const noexcept
Get the serialization flags for this config.
Definition: NvInferRuntime.h:2408
bool getFlag(SerializationFlag serializationFlag) const noexcept
Returns true if the serialization flag is set.
Definition: NvInferRuntime.h:2444
apiv::VSerializationConfig * mImpl
Definition: NvInferRuntime.h:2450
An Interface class for version control.
Definition: NvInferRuntimeBase.h:400
Version information associated with a TRT interface.
Definition: NvInferRuntimeBase.h:365
Register the plugin creator to the registry The static registry object will be instantiated when the ...
Definition: NvInferRuntime.h:4602
PluginRegistrar()
Definition: NvInferRuntime.h:4604
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:121
DataType type
The type of the weights.
Definition: NvInferRuntime.h:123
int64_t count
The number of weights in the array.
Definition: NvInferRuntime.h:125
void const * values
The weight values, in a contiguous array.
Definition: NvInferRuntime.h:124
Definition: NvInferRuntime.h:3460
virtual bool processDebugTensor(void const *addr, TensorLocation location, DataType type, Dims const &shape, char const *name, cudaStream_t stream)=0
Callback function that is called when a debug tensor’s value is updated and the debug state of the te...
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:3465
~IDebugListener() override=default
Definition: NvInferRuntimeBase.h:857
Definition: NvInferRuntimeBase.h:469
Definition: NvInferRuntime.h:4651
bool deallocateAsync(void *const memory, cudaStream_t) noexcept override=0
A thread-safe callback implemented by the application to handle stream-ordered asynchronous release o...
void * allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t) noexcept override=0
A thread-safe callback implemented by the application to handle stream-ordered asynchronous acquisiti...
TRT_DEPRECATED void * allocate(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept override
A thread-safe callback implemented by the application to handle acquisition of GPU memory.
Definition: NvInferRuntime.h:4737
TRT_DEPRECATED bool deallocate(void *const memory) noexcept override
A thread-safe callback implemented by the application to handle release of GPU memory.
Definition: NvInferRuntime.h:4761
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:4769
~IGpuAsyncAllocator() override=default
Definition: NvInferRuntime.h:3373
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:3378
virtual void * reallocateOutputAsync(char const *tensorName, void *currentMemory, uint64_t size, uint64_t alignment, cudaStream_t)
Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated....
Definition: NvInferRuntime.h:3430
virtual void notifyShape(char const *tensorName, Dims const &dims) noexcept=0
Called by TensorRT when the shape of the output tensor is known.
virtual TRT_DEPRECATED void * reallocateOutput(char const *tensorName, void *currentMemory, uint64_t size, uint64_t alignment) noexcept
Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated....
Definition: NvInferRuntime.h:3402
Definition: NvInferRuntime.h:665
Definition: NvInferRuntimePlugin.h:935
Definition: NvInferRuntime.h:1134
virtual PluginFieldCollection const * getFieldNames() noexcept=0
Return a list of fields that need to be passed to createPlugin() when creating a plugin for use in th...
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1139
virtual IPluginV3 * createPlugin(AsciiChar const *name, PluginFieldCollection const *fc, TensorRTPhase phase) noexcept=0
Return a plugin object. Return nullptr in case of error.
Definition: NvInferRuntime.h:687
virtual IPluginV3 * clone() noexcept=0
Clone the plugin object. This copies over internal plugin parameters and returns a new plugin object ...
virtual IPluginCapability * getCapabilityInterface(PluginCapabilityType type) noexcept=0
Return a pointer to plugin object implementing the specified PluginCapabilityType.
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:692
Definition: NvInferRuntime.h:784
virtual int32_t getFormatCombinationLimit() noexcept
Return the maximum number of format combinations that will be timed by TensorRT during the build phas...
Definition: NvInferRuntime.h:985
virtual int32_t getNbOutputs() const noexcept=0
Get the number of outputs from the plugin.
virtual int32_t configurePlugin(DynamicPluginTensorDesc const *in, int32_t nbInputs, DynamicPluginTensorDesc const *out, int32_t nbOutputs) noexcept=0
Configure the plugin.
virtual int32_t getNbTactics() noexcept
Query for the number of custom tactics the plugin intends to use.
Definition: NvInferRuntime.h:961
virtual char const * getMetadataString() noexcept
Query for a string representing the configuration of the plugin. May be called anytime after plugin c...
Definition: NvInferRuntime.h:996
virtual char const * getTimingCacheID() noexcept
Called to query the suffix to use for the timing cache ID. May be called anytime after plugin creatio...
Definition: NvInferRuntime.h:977
virtual bool supportsFormatCombination(int32_t pos, DynamicPluginTensorDesc const *inOut, int32_t nbInputs, int32_t nbOutputs) noexcept=0
Return true if plugin supports the format and datatype for the input/output indexed by pos.
virtual int32_t getOutputDataTypes(DataType *outputTypes, int32_t nbOutputs, const DataType *inputTypes, int32_t nbInputs) const noexcept=0
Provide the data types of the plugin outputs if the input tensors have the data types provided.
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:796
virtual int32_t getOutputShapes(DimsExprs const *inputs, int32_t nbInputs, DimsExprs const *shapeInputs, int32_t nbShapeInputs, DimsExprs *outputs, int32_t nbOutputs, IExprBuilder &exprBuilder) noexcept=0
Provide expressions for computing dimensions of the output tensors from dimensions of the input tenso...
virtual int32_t getValidTactics(int32_t *tactics, int32_t nbTactics) noexcept
Query for any custom tactics that the plugin intends to use.
Definition: NvInferRuntime.h:953
Definition: NvInferRuntime.h:741
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:746
virtual AsciiChar const * getPluginName() const noexcept=0
Return the plugin name. Should match the plugin name returned by the corresponding plugin creator.
Definition: NvInferRuntime.h:1003
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1008
virtual int32_t onShapeChange(PluginTensorDesc const *in, int32_t nbInputs, PluginTensorDesc const *out, int32_t nbOutputs) noexcept=0
Called when a plugin is being prepared for execution for specific dimensions. This could happen multi...
virtual PluginFieldCollection const * getFieldsToSerialize() noexcept=0
Get the plugin fields which should be serialized.
virtual int32_t setTactic(int32_t tactic) noexcept
Set the tactic to be used in the subsequent call to enqueue(). If no custom tactics were advertised,...
Definition: NvInferRuntime.h:1020
virtual int32_t enqueue(PluginTensorDesc const *inputDesc, PluginTensorDesc const *outputDesc, void const *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept=0
Execute the layer.
virtual IPluginV3 * attachToContext(IPluginResourceContext *context) noexcept=0
Clone the plugin, attach the cloned plugin object to a execution context and grant the cloned plugin ...
Definition: NvInferRuntime.h:1215
virtual ~IProfiler() noexcept
Definition: NvInferRuntime.h:1226
virtual void reportLayerTime(char const *layerName, float ms) noexcept=0
Layer time reporting callback.
Definition: NvInferRuntimeBase.h:1114
IRefitter * createInferRefitter(ICudaEngine &engine, ILogger &logger) noexcept
Create an instance of an IRefitter class.
Definition: NvInferRuntime.h:4582
IRuntime * createInferRuntime(ILogger &logger) noexcept
Create an instance of an IRuntime class.
Definition: NvInferRuntime.h:4572
The TensorRT API version 1 namespace.
uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferRuntime.h:2320
v_1_0::IPluginCapability IPluginCapability
Definition: NvInferRuntime.h:682
v_1_0::IOutputAllocator IOutputAllocator
Definition: NvInferRuntime.h:3455
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:73
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:176
@ kSUB
Substract the second element from the first.
@ kSUM
Sum of the two operands.
@ kPROD
Product of the two operands.
@ kFLOOR_DIV
Floor division of the first element by the second.
@ kEQUAL
1 if operands are equal, 0 otherwise.
@ kMIN
Minimum of the two operands.
@ kLESS
1 if first operand is less than second operand, 0 otherwise.
@ kMAX
Maximum of the two operands.
@ kCEIL_DIV
Division rounding up.
TensorIOMode
Definition of tensor IO Mode.
Definition: NvInferRuntimeBase.h:1100
HardwareCompatibilityLevel
Describes requirements of compatibility with GPU architectures other than that of the GPU on which th...
Definition: NvInfer.h:8578
SerializationFlag
List of valid flags that the engine can enable when serializing the bytes.
Definition: NvInferRuntime.h:2361
@ kEXCLUDE_WEIGHTS
Exclude the weights that can be refitted.
v_1_0::IProfiler IProfiler
Definition: NvInferRuntime.h:1241
uint32_t TempfileControlFlags
Represents a collection of one or more TempfileControlFlag values combined using bitwise-OR operation...
Definition: NvInferRuntime.h:1319
PluginCapabilityType
Enumerates the different capability types a IPluginV3 object may have.
Definition: NvInferRuntimePlugin.h:910
constexpr int32_t EnumMax< WeightsRole >() noexcept
Maximum number of elements in WeightsRole enum.
Definition: NvInferRuntime.h:1262
char_t AsciiChar
Definition: NvInferRuntimeBase.h:107
TensorRTPhase
Indicates a phase of operation of TensorRT.
Definition: NvInferRuntimePlugin.h:925
@ kV2_DYNAMICEXT
IPluginV2DynamicExt.
constexpr int32_t EnumMax< LayerInformationFormat >() noexcept
Definition: NvInferRuntime.h:4396
DataType
The type of weights and tensors.
Definition: NvInferRuntimeBase.h:135
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:1273
@ kSCALE
Scale layer.
@ kCONSTANT
Constant layer.
v_1_0::IDebugListener IDebugListener
Definition: NvInferRuntime.h:3496
TempfileControlFlag
Flags used to control TensorRT's behavior when creating executable temporary files.
Definition: NvInferRuntime.h:1296
@ kALLOW_IN_MEMORY_FILES
Allow creating and loading files in-memory (or unnamed files).
constexpr int32_t EnumMax< OptProfileSelector >() noexcept
Number of different values of OptProfileSelector enum.
Definition: NvInferRuntime.h:2076
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:1251
@ kSHIFT
shift part of IScaleLayer
@ kANY
Any other weights role.
@ kBIAS
bias for IConvolutionLayer or IDeconvolutionLayer
@ kKERNEL
kernel for IConvolutionLayer or IDeconvolutionLayer
constexpr int32_t EnumMax< ProfilingVerbosity >() noexcept
Maximum number of profile verbosity levels in ProfilingVerbosity enum.
Definition: NvInferRuntime.h:2340
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
Definition: NvInferRuntime.h:2332
@ kLAYER_NAMES_ONLY
Print only the layer names. This is the default setting.
@ kDETAILED
Print detailed layer information including layer names and layer parameters.
TacticSource
List of tactic sources for TensorRT.
Definition: NvInferRuntime.h:2283
TensorFormat PluginFormat
PluginFormat is reserved for backward compatibility.
Definition: NvInferRuntimePlugin.h:48
@ kMIN
Minimum of the two elements.
constexpr int32_t EnumMax< TempfileControlFlag >() noexcept
Maximum number of elements in TempfileControlFlag enum.
Definition: NvInferRuntime.h:1308
uint32_t SerializationFlags
Represents one or more SerializationFlag values using binary OR operations, e.g., 1U << Serialization...
Definition: NvInferRuntime.h:2351
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntimeBase.h:250
ExecutionContextAllocationStrategy
Different memory allocation behaviors for IExecutionContext.
Definition: NvInferRuntime.h:2465
@ kSTATIC
Default static allocation with the maximum size across all profiles.
@ kUSER_MANAGED
The user supplies custom allocation to the execution context.
@ kON_PROFILE_CHANGE
Reallocate for a profile when it's selected.
constexpr int32_t EnumMax< TacticSource >() noexcept
Maximum number of tactic sources in TacticSource enum.
Definition: NvInferRuntime.h:2309
LayerInformationFormat
The format in which the IEngineInspector prints the layer information.
Definition: NvInferRuntime.h:4388
@ kJSON
Print layer information in JSON format.
@ kONELINE
Print layer information in one line per layer.
v_1_0::IGpuAsyncAllocator IGpuAsyncAllocator
Definition: NvInferRuntime.h:4789
@ kMAX
Maximum over elements.
constexpr int32_t EnumMax< DeviceType >() noexcept
Maximum number of elements in DeviceType enum.
Definition: NvInferRuntime.h:1280
constexpr int32_t EnumMax< DimensionOperation >() noexcept
Maximum number of elements in DimensionOperation enum.
Definition: NvInferRuntime.h:190
constexpr int32_t EnumMax< ExecutionContextAllocationStrategy >() noexcept
Maximum number of memory allocation strategies in ExecutionContextAllocationStrategy enum.
Definition: NvInferRuntime.h:2477
constexpr int32_t EnumMax< SerializationFlag >() noexcept
Maximum number of serialization flags in SerializationFlag enum.
Definition: NvInferRuntime.h:2368
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntime.h:201
@ kHOST
Data stored on host.
@ kDEVICE
Data stored on device.
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:2064
@ kOPT
This is used to set or get the value that is used in the optimization (kernel selection).
uint32_t AllocatorFlags
Definition: NvInferRuntimeBase.h:461
Summarizes tensors that a plugin might see for an input or output.
Definition: NvInferRuntime.h:359
Dims min
Lower bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:364
Dims max
Upper bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:367
Dims opt
Optimum value of tensor’s dimensions specified for auto-tuning.
Definition: NvInferRuntime.h:370
PluginTensorDesc desc
Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of ...
Definition: NvInferRuntime.h:361
Plugin field collection struct.
Definition: NvInferRuntimePlugin.h:897
Fields that a plugin might see for an input or output.
Definition: NvInferRuntimePlugin.h:67
Declaration of EnumMaxImpl struct to store maximum number of elements in an enumeration type.
Definition: NvInferRuntimeBase.h:120

  Copyright © 2024 NVIDIA Corporation
  Privacy Policy | Manage My Privacy | Do Not Sell or Share My Data | Terms of Service | Accessibility | Corporate Policies | Product Security | Contact