TensorRT 10.5.0
NvInferRuntime.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef NV_INFER_RUNTIME_H
19#define NV_INFER_RUNTIME_H
20
26
27#include "NvInferImpl.h"
29
30namespace nvinfer1
31{
32
33class IExecutionContext;
34class ICudaEngine;
35class IPluginFactory;
36class IEngineInspector;
37
46
48{
49protected:
50 INoCopy() = default;
51 virtual ~INoCopy() = default;
52 INoCopy(INoCopy const& other) = delete;
53 INoCopy& operator=(INoCopy const& other) = delete;
54 INoCopy(INoCopy&& other) = delete;
55 INoCopy& operator=(INoCopy&& other) = delete;
56};
57
72enum class EngineCapability : int32_t
73{
78 kSTANDARD = 0,
79
86 kSAFETY = 1,
87
94};
95
96namespace impl
97{
99template <>
101{
102 static constexpr int32_t kVALUE = 3;
103};
104} // namespace impl
105
121{
122public:
124 void const* values;
125 int64_t count;
126};
127
138class IHostMemory : public INoCopy
139{
140public:
141 virtual ~IHostMemory() noexcept = default;
142
144 void* data() const noexcept
145 {
146 return mImpl->data();
147 }
148
150 std::size_t size() const noexcept
151 {
152 return mImpl->size();
153 }
154
156 DataType type() const noexcept
157 {
158 return mImpl->type();
159 }
160
161protected:
162 apiv::VHostMemory* mImpl;
163};
164
175enum class DimensionOperation : int32_t
176{
177 kSUM = 0,
178 kPROD = 1,
179 kMAX = 2,
180 kMIN = 3,
181 kSUB = 4,
182 kEQUAL = 5,
183 kLESS = 6,
184 kFLOOR_DIV = 7,
185 kCEIL_DIV = 8
186};
187
189template <>
190constexpr inline int32_t EnumMax<DimensionOperation>() noexcept
191{
192 return 9;
193}
194
200enum class TensorLocation : int32_t
201{
202 kDEVICE = 0,
203 kHOST = 1,
204};
205
206namespace impl
207{
209template <>
211{
212 static constexpr int32_t kVALUE = 2;
213};
214} // namespace impl
215
229{
230public:
234 bool isConstant() const noexcept
235 {
236 return mImpl->isConstant();
237 }
238
245 int64_t getConstantValue() const noexcept
246 {
247 return mImpl->getConstantValue();
248 }
249
250protected:
251 apiv::VDimensionExpr* mImpl;
252 virtual ~IDimensionExpr() noexcept = default;
253
254public:
260 bool isSizeTensor() const noexcept
261 {
262 return mImpl->isSizeTensor();
263 }
264};
265
283class IExprBuilder : public INoCopy
284{
285public:
289 IDimensionExpr const* constant(int64_t value) noexcept
290 {
291 return mImpl->constant(value);
292 }
293
301 DimensionOperation op, IDimensionExpr const& first, IDimensionExpr const& second) noexcept
302 {
303 return mImpl->operation(op, first, second);
304 }
305
306protected:
307 apiv::VExprBuilder* mImpl;
308 virtual ~IExprBuilder() noexcept = default;
309
310public:
335 IDimensionExpr const* declareSizeTensor(int32_t outputIndex, IDimensionExpr const& opt, IDimensionExpr const& upper)
336 {
337 return mImpl->declareSizeTensor(outputIndex, opt, upper);
338 }
339};
340
347{
348public:
349 int32_t nbDims;
351};
352
359{
362
365
368
371};
372
404{
405public:
406 IPluginV2DynamicExt* clone() const noexcept override = 0;
407
432 virtual DimsExprs getOutputDimensions(
433 int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept = 0;
434
438 static constexpr int32_t kFORMAT_COMBINATION_LIMIT = 100;
439
472 virtual bool supportsFormatCombination(
473 int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
474
512 virtual void configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs,
513 DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
514
524 virtual size_t getWorkspaceSize(PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs,
525 int32_t nbOutputs) const noexcept = 0;
526
539 virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc,
540 void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0;
541
542protected:
550 int32_t getTensorRTVersion() const noexcept override
551 {
552 return (static_cast<int32_t>(PluginVersion::kV2_DYNAMICEXT) << 24 | (NV_TENSORRT_VERSION & 0xFFFFFF));
553 }
554
555 virtual ~IPluginV2DynamicExt() noexcept {}
556
557private:
558 // Following are obsolete base class methods, and must not be implemented or used.
559
563 void configurePlugin(Dims const*, int32_t, Dims const*, int32_t, DataType const*, DataType const*, bool const*,
564 bool const*, PluginFormat, int32_t) noexcept override final
565 {
566 }
567
571 bool supportsFormat(DataType, PluginFormat) const noexcept override final
572 {
573 return false;
574 }
575
579 Dims getOutputDimensions(int32_t, Dims const*, int32_t) noexcept override final
580 {
581 return Dims{-1, {}};
582 }
583
591 TRT_DEPRECATED bool isOutputBroadcastAcrossBatch(int32_t, bool const*, int32_t) const noexcept override final
592 {
593 return false;
594 }
595
603 TRT_DEPRECATED bool canBroadcastInputAcrossBatch(int32_t) const noexcept override final
604 {
605 return true;
606 }
607
611 size_t getWorkspaceSize(int32_t) const noexcept override final
612 {
613 return 0;
614 }
615
619 int32_t enqueue(int32_t, void const* const*, void* const*, void*, cudaStream_t) noexcept override final
620 {
621 return 1;
622 }
623};
624
639{
640public:
645 virtual IGpuAllocator* getGpuAllocator() const noexcept = 0;
646
651 virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
652 virtual ~IPluginResourceContext() noexcept = default;
653
654protected:
658 IPluginResourceContext& operator=(IPluginResourceContext const&) & = default;
660};
661
662namespace v_1_0
663{
665{
666};
667} // namespace v_1_0
668
683
684namespace v_1_0
685{
687{
688public:
692 InterfaceInfo getInterfaceInfo() const noexcept override
693 {
694 return InterfaceInfo{"PLUGIN", 1, 0};
695 }
696
709
720 virtual IPluginV3* clone() noexcept = 0;
721};
722
723} // namespace v_1_0
724
736using IPluginV3 = v_1_0::IPluginV3;
737
738namespace v_1_0
739{
741{
742public:
746 InterfaceInfo getInterfaceInfo() const noexcept override
747 {
748 return InterfaceInfo{"PLUGIN_V3ONE_CORE", 1, 0};
749 }
750
759 virtual AsciiChar const* getPluginName() const noexcept = 0;
760
769 virtual AsciiChar const* getPluginVersion() const noexcept = 0;
770
780 virtual AsciiChar const* getPluginNamespace() const noexcept = 0;
781};
782
784{
785public:
791 static constexpr int32_t kDEFAULT_FORMAT_COMBINATION_LIMIT = 100;
792
796 InterfaceInfo getInterfaceInfo() const noexcept override
797 {
798 return InterfaceInfo{"PLUGIN_V3ONE_BUILD", 1, 0};
799 }
800
818 virtual int32_t configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs,
819 DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
820
837 virtual int32_t getOutputDataTypes(
838 DataType* outputTypes, int32_t nbOutputs, const DataType* inputTypes, int32_t nbInputs) const noexcept = 0;
839
860 virtual int32_t getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, DimsExprs const* shapeInputs,
861 int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept = 0;
862
898 int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
899
905 virtual int32_t getNbOutputs() const noexcept = 0;
906
916 virtual size_t getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs,
917 DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept
918 {
919 return 0;
920 }
921
953 virtual int32_t getValidTactics(int32_t* tactics, int32_t nbTactics) noexcept
954 {
955 return 0;
956 }
957
961 virtual int32_t getNbTactics() noexcept
962 {
963 return 0;
964 }
965
977 virtual char const* getTimingCacheID() noexcept
978 {
979 return nullptr;
980 }
981
985 virtual int32_t getFormatCombinationLimit() noexcept
986 {
987 return kDEFAULT_FORMAT_COMBINATION_LIMIT;
988 }
989
996 virtual char const* getMetadataString() noexcept
997 {
998 return nullptr;
999 }
1000};
1001
1003{
1004public:
1008 InterfaceInfo getInterfaceInfo() const noexcept override
1009 {
1010 return InterfaceInfo{"PLUGIN_V3ONE_RUNTIME", 1, 0};
1011 }
1012
1020 virtual int32_t setTactic(int32_t tactic) noexcept
1021 {
1022 return 0;
1023 }
1024
1043 virtual int32_t onShapeChange(
1044 PluginTensorDesc const* in, int32_t nbInputs, PluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
1045
1059 virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc,
1060 void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0;
1061
1081 virtual IPluginV3* attachToContext(IPluginResourceContext* context) noexcept = 0;
1082
1088
1092 virtual PluginFieldCollection const* getFieldsToSerialize() noexcept = 0;
1093};
1094} // namespace v_1_0
1095
1096namespace v_2_0
1097{
1098
1100{
1101public:
1102 InterfaceInfo getInterfaceInfo() const noexcept override
1103 {
1104 return InterfaceInfo{"PLUGIN_V3ONE_BUILD", 2, 0};
1105 }
1106
1136 virtual int32_t getAliasedInput(int32_t outputIndex) noexcept
1137 {
1138 return -1;
1139 }
1140};
1141
1142} // namespace v_2_0
1143
1154
1166
1178
1187
1188namespace v_1_0
1189{
1191{
1192public:
1196 InterfaceInfo getInterfaceInfo() const noexcept override
1197 {
1198 return InterfaceInfo{"PLUGIN CREATOR_V3ONE", 1, 0};
1199 }
1200
1214 AsciiChar const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept = 0;
1215
1222 virtual PluginFieldCollection const* getFieldNames() noexcept = 0;
1223
1230 virtual AsciiChar const* getPluginName() const noexcept = 0;
1231
1238 virtual AsciiChar const* getPluginVersion() const noexcept = 0;
1239
1246 virtual AsciiChar const* getPluginNamespace() const noexcept = 0;
1247
1249 virtual ~IPluginCreatorV3One() = default;
1250
1251protected:
1254 IPluginCreatorV3One& operator=(IPluginCreatorV3One const&) & = default;
1255 IPluginCreatorV3One& operator=(IPluginCreatorV3One&&) & = default;
1256};
1257} // namespace v_1_0
1258
1268
1269namespace v_1_0
1270{
1272{
1273public:
1281 virtual void reportLayerTime(char const* layerName, float ms) noexcept = 0;
1282
1283 virtual ~IProfiler() noexcept {}
1284};
1285} // namespace v_1_0
1286
1299
1307enum class WeightsRole : int32_t
1308{
1309 kKERNEL = 0,
1310 kBIAS = 1,
1311 kSHIFT = 2,
1312 kSCALE = 3,
1313 kCONSTANT = 4,
1314 kANY = 5,
1315};
1316
1318template <>
1319constexpr inline int32_t EnumMax<WeightsRole>() noexcept
1320{
1321 return 6;
1322}
1323
1329enum class DeviceType : int32_t
1330{
1331 kGPU = 0,
1332 kDLA = 1,
1333};
1334
1336template <>
1337constexpr inline int32_t EnumMax<DeviceType>() noexcept
1338{
1339 return 2;
1340}
1341
1352enum class TempfileControlFlag : int32_t
1353{
1356
1361};
1362
1364template <>
1365constexpr inline int32_t EnumMax<TempfileControlFlag>() noexcept
1366{
1367 return 2;
1368}
1369
1376using TempfileControlFlags = uint32_t;
1377
1385class IRuntime : public INoCopy
1386{
1387public:
1388 virtual ~IRuntime() noexcept = default;
1389
1401 void setDLACore(int32_t dlaCore) noexcept
1402 {
1403 mImpl->setDLACore(dlaCore);
1404 }
1405
1411 int32_t getDLACore() const noexcept
1412 {
1413 return mImpl->getDLACore();
1414 }
1415
1419 int32_t getNbDLACores() const noexcept
1420 {
1421 return mImpl->getNbDLACores();
1422 }
1423
1434 void setGpuAllocator(IGpuAllocator* allocator) noexcept
1435 {
1436 mImpl->setGpuAllocator(allocator);
1437 }
1438
1450 //
1453 void setErrorRecorder(IErrorRecorder* recorder) noexcept
1454 {
1455 mImpl->setErrorRecorder(recorder);
1456 }
1457
1469 {
1470 return mImpl->getErrorRecorder();
1471 }
1472
1483 ICudaEngine* deserializeCudaEngine(void const* blob, std::size_t size) noexcept
1484 {
1485 return mImpl->deserializeCudaEngine(blob, size);
1486 }
1487
1502 {
1503 return mImpl->deserializeCudaEngine(streamReader);
1504 }
1505
1511 ILogger* getLogger() const noexcept
1512 {
1513 return mImpl->getLogger();
1514 }
1515
1526 bool setMaxThreads(int32_t maxThreads) noexcept
1527 {
1528 return mImpl->setMaxThreads(maxThreads);
1529 }
1530
1540 int32_t getMaxThreads() const noexcept
1541 {
1542 return mImpl->getMaxThreads();
1543 }
1544
1575 void setTemporaryDirectory(char const* path) noexcept
1576 {
1577 return mImpl->setTemporaryDirectory(path);
1578 }
1579
1586 char const* getTemporaryDirectory() const noexcept
1587 {
1588 return mImpl->getTemporaryDirectory();
1589 }
1590
1603 {
1604 return mImpl->setTempfileControlFlags(flags);
1605 }
1606
1615 {
1616 return mImpl->getTempfileControlFlags();
1617 }
1618
1625 {
1626 return mImpl->getPluginRegistry();
1627 }
1628
1642 IRuntime* loadRuntime(char const* path) noexcept
1643 {
1644 return mImpl->loadRuntime(path);
1645 }
1646
1654 void setEngineHostCodeAllowed(bool allowed) noexcept
1655 {
1656 return mImpl->setEngineHostCodeAllowed(allowed);
1657 }
1658
1664 bool getEngineHostCodeAllowed() const noexcept
1665 {
1666 return mImpl->getEngineHostCodeAllowed();
1667 }
1668
1669protected:
1670 apiv::VRuntime* mImpl;
1671};
1672
1680class IRefitter : public INoCopy
1681{
1682public:
1683 virtual ~IRefitter() noexcept = default;
1684
1700 bool setWeights(char const* layerName, WeightsRole role, Weights weights) noexcept
1701 {
1702 return mImpl->setWeights(layerName, role, weights);
1703 }
1704
1717 bool refitCudaEngine() noexcept
1718 {
1719 return mImpl->refitCudaEngine();
1720 }
1721
1738 int32_t getMissing(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
1739 {
1740 return mImpl->getMissing(size, layerNames, roles);
1741 }
1742
1755 int32_t getAll(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
1756 {
1757 return mImpl->getAll(size, layerNames, roles);
1758 }
1759
1777 TRT_DEPRECATED bool setDynamicRange(char const* tensorName, float min, float max) noexcept
1778 {
1779 return mImpl->setDynamicRange(tensorName, min, max);
1780 }
1781
1793 TRT_DEPRECATED float getDynamicRangeMin(char const* tensorName) const noexcept
1794 {
1795 return mImpl->getDynamicRangeMin(tensorName);
1796 }
1797
1809 TRT_DEPRECATED float getDynamicRangeMax(char const* tensorName) const noexcept
1810 {
1811 return mImpl->getDynamicRangeMax(tensorName);
1812 }
1813
1827 TRT_DEPRECATED int32_t getTensorsWithDynamicRange(int32_t size, char const** tensorNames) const noexcept
1828 {
1829 return mImpl->getTensorsWithDynamicRange(size, tensorNames);
1830 }
1831
1843 //
1846 void setErrorRecorder(IErrorRecorder* recorder) noexcept
1847 {
1848 mImpl->setErrorRecorder(recorder);
1849 }
1850
1862 {
1863 return mImpl->getErrorRecorder();
1864 }
1865
1886 bool setNamedWeights(char const* name, Weights weights) noexcept
1887 {
1888 return mImpl->setNamedWeights(name, weights);
1889 }
1890
1906 int32_t getMissingWeights(int32_t size, char const** weightsNames) noexcept
1907 {
1908 return mImpl->getMissingWeights(size, weightsNames);
1909 }
1910
1922 int32_t getAllWeights(int32_t size, char const** weightsNames) noexcept
1923 {
1924 return mImpl->getAllWeights(size, weightsNames);
1925 }
1926
1932 ILogger* getLogger() const noexcept
1933 {
1934 return mImpl->getLogger();
1935 }
1936
1948 bool setMaxThreads(int32_t maxThreads) noexcept
1949 {
1950 return mImpl->setMaxThreads(maxThreads);
1951 }
1952
1962 int32_t getMaxThreads() const noexcept
1963 {
1964 return mImpl->getMaxThreads();
1965 }
1966
1989 bool setNamedWeights(char const* name, Weights weights, TensorLocation location) noexcept
1990 {
1991 return mImpl->setNamedWeightsWithLocation(name, weights, location);
1992 }
1993
2005 Weights getNamedWeights(char const* weightsName) const noexcept
2006 {
2007 return mImpl->getNamedWeights(weightsName);
2008 }
2009
2021 TensorLocation getWeightsLocation(char const* weightsName) const noexcept
2022 {
2023 return mImpl->getWeightsLocation(weightsName);
2024 }
2025
2037 bool unsetNamedWeights(char const* weightsName) noexcept
2038 {
2039 return mImpl->unsetNamedWeights(weightsName);
2040 }
2041
2053 void setWeightsValidation(bool weightsValidation) noexcept
2054 {
2055 return mImpl->setWeightsValidation(weightsValidation);
2056 }
2057
2061 bool getWeightsValidation() const noexcept
2062 {
2063 return mImpl->getWeightsValidation();
2064 }
2065
2083 bool refitCudaEngineAsync(cudaStream_t stream) noexcept
2084 {
2085 return mImpl->refitCudaEngineAsync(stream);
2086 }
2087
2101 Weights getWeightsPrototype(char const* weightsName) const noexcept
2102 {
2103 return mImpl->getWeightsPrototype(weightsName);
2104 }
2105
2106protected:
2107 apiv::VRefitter* mImpl;
2108};
2109
2120enum class OptProfileSelector : int32_t
2121{
2122 kMIN = 0,
2123 kOPT = 1,
2124 kMAX = 2
2125};
2126
2132template <>
2133constexpr inline int32_t EnumMax<OptProfileSelector>() noexcept
2134{
2135 return 3;
2136}
2137
2161{
2162public:
2190 bool setDimensions(char const* inputName, OptProfileSelector select, Dims const& dims) noexcept
2191 {
2192 return mImpl->setDimensions(inputName, select, dims);
2193 }
2194
2202 Dims getDimensions(char const* inputName, OptProfileSelector select) const noexcept
2203 {
2204 return mImpl->getDimensions(inputName, select);
2205 }
2206
2250 char const* inputName, OptProfileSelector select, int32_t const* values, int32_t nbValues) noexcept
2251 {
2252 return mImpl->setShapeValues(inputName, select, values, nbValues);
2253 }
2254
2263 int32_t getNbShapeValues(char const* inputName) const noexcept
2264 {
2265 return mImpl->getNbShapeValues(inputName);
2266 }
2267
2275 int32_t const* getShapeValues(char const* inputName, OptProfileSelector select) const noexcept
2276 {
2277 return mImpl->getShapeValues(inputName, select);
2278 }
2279
2293 bool setExtraMemoryTarget(float target) noexcept
2294 {
2295 return mImpl->setExtraMemoryTarget(target);
2296 }
2297
2305 float getExtraMemoryTarget() const noexcept
2306 {
2307 return mImpl->getExtraMemoryTarget();
2308 }
2309
2322 bool isValid() const noexcept
2323 {
2324 return mImpl->isValid();
2325 }
2326
2327protected:
2328 apiv::VOptimizationProfile* mImpl;
2329 virtual ~IOptimizationProfile() noexcept = default;
2330};
2331
2339enum class TacticSource : int32_t
2340{
2345
2349
2354
2359
2363};
2364
2365template <>
2366constexpr inline int32_t EnumMax<TacticSource>() noexcept
2367{
2368 return 5;
2369}
2370
2377using TacticSources = uint32_t;
2378
2388enum class ProfilingVerbosity : int32_t
2389{
2390 kLAYER_NAMES_ONLY = 0,
2391 kNONE = 1,
2392 kDETAILED = 2,
2393};
2394
2396template <>
2397constexpr inline int32_t EnumMax<ProfilingVerbosity>() noexcept
2398{
2399 return 3;
2400}
2401
2408using SerializationFlags = uint32_t;
2409
2417enum class SerializationFlag : int32_t
2418{
2419 kEXCLUDE_WEIGHTS = 0,
2421};
2422
2424template <>
2425constexpr inline int32_t EnumMax<SerializationFlag>() noexcept
2426{
2427 return 2;
2428}
2429
2438{
2439public:
2440 virtual ~ISerializationConfig() noexcept = default;
2441
2453 bool setFlags(SerializationFlags serializationFlags) noexcept
2454 {
2455 return mImpl->setFlags(serializationFlags);
2456 }
2457
2466 {
2467 return mImpl->getFlags();
2468 }
2469
2477 bool clearFlag(SerializationFlag serializationFlag) noexcept
2478 {
2479 return mImpl->clearFlag(serializationFlag);
2480 }
2481
2489 bool setFlag(SerializationFlag serializationFlag) noexcept
2490 {
2491 return mImpl->setFlag(serializationFlag);
2492 }
2493
2501 bool getFlag(SerializationFlag serializationFlag) const noexcept
2502 {
2503 return mImpl->getFlag(serializationFlag);
2504 }
2505
2506protected:
2507 apiv::VSerializationConfig* mImpl;
2508};
2509
2522{
2523 kSTATIC = 0,
2524 kON_PROFILE_CHANGE = 1,
2525 kUSER_MANAGED = 2,
2526};
2527
2533template <>
2534constexpr inline int32_t EnumMax<ExecutionContextAllocationStrategy>() noexcept
2535{
2536 return 3;
2537}
2538
2546class ICudaEngine : public INoCopy
2547{
2548public:
2549 virtual ~ICudaEngine() noexcept = default;
2550
2561 Dims getTensorShape(char const* tensorName) const noexcept
2562 {
2563 return mImpl->getTensorShape(tensorName);
2564 }
2565
2576 DataType getTensorDataType(char const* tensorName) const noexcept
2577 {
2578 return mImpl->getTensorDataType(tensorName);
2579 }
2580
2590 int32_t getNbLayers() const noexcept
2591 {
2592 return mImpl->getNbLayers();
2593 }
2594
2604 IHostMemory* serialize() const noexcept
2605 {
2606 return mImpl->serialize();
2607 }
2608
2623 {
2624 return mImpl->createExecutionContext(strategy);
2625 }
2626
2639 TensorLocation getTensorLocation(char const* tensorName) const noexcept
2640 {
2641 return mImpl->getTensorLocation(tensorName);
2642 }
2643
2659 bool isShapeInferenceIO(char const* tensorName) const noexcept
2660 {
2661 return mImpl->isShapeInferenceIO(tensorName);
2662 }
2663
2673 TensorIOMode getTensorIOMode(char const* tensorName) const noexcept
2674 {
2675 return mImpl->getTensorIOMode(tensorName);
2676 }
2677
2686 {
2687 return mImpl->createExecutionContextWithoutDeviceMemory();
2688 }
2689
2697 TRT_DEPRECATED size_t getDeviceMemorySize() const noexcept
2698 {
2699 return mImpl->getDeviceMemorySize();
2700 }
2701
2709 TRT_DEPRECATED size_t getDeviceMemorySizeForProfile(int32_t profileIndex) const noexcept
2710 {
2711 return mImpl->getDeviceMemorySizeForProfile(profileIndex);
2712 }
2713
2725 int64_t getDeviceMemorySizeV2() const noexcept
2726 {
2727 return mImpl->getDeviceMemorySizeV2();
2728 }
2729
2741 int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept
2742 {
2743 return mImpl->getDeviceMemorySizeForProfileV2(profileIndex);
2744 }
2745
2751 bool isRefittable() const noexcept
2752 {
2753 return mImpl->isRefittable();
2754 }
2755
2772 int32_t getTensorBytesPerComponent(char const* tensorName) const noexcept
2773 {
2774 return mImpl->getTensorBytesPerComponent(tensorName);
2775 }
2776
2790 int32_t getTensorBytesPerComponent(char const* tensorName, int32_t profileIndex) const noexcept
2791 {
2792 return mImpl->getTensorBytesPerComponentV2(tensorName, profileIndex);
2793 }
2794
2811 int32_t getTensorComponentsPerElement(char const* tensorName) const noexcept
2812 {
2813 return mImpl->getTensorComponentsPerElement(tensorName);
2814 }
2815
2829 int32_t getTensorComponentsPerElement(char const* tensorName, int32_t profileIndex) const noexcept
2830 {
2831 return mImpl->getTensorComponentsPerElementV2(tensorName, profileIndex);
2832 }
2833
2844 TensorFormat getTensorFormat(char const* tensorName) const noexcept
2845 {
2846 return mImpl->getTensorFormat(tensorName);
2847 }
2848
2858 TensorFormat getTensorFormat(char const* tensorName, int32_t profileIndex) const noexcept
2859 {
2860 return mImpl->getTensorFormatV2(tensorName, profileIndex);
2861 }
2862
2882 char const* getTensorFormatDesc(char const* tensorName) const noexcept
2883 {
2884 return mImpl->getTensorFormatDesc(tensorName);
2885 }
2886
2905 char const* getTensorFormatDesc(char const* tensorName, int32_t profileIndex) const noexcept
2906 {
2907 return mImpl->getTensorFormatDescV2(tensorName, profileIndex);
2908 }
2909
2922 int32_t getTensorVectorizedDim(char const* tensorName) const noexcept
2923 {
2924 return mImpl->getTensorVectorizedDim(tensorName);
2925 }
2926
2938 int32_t getTensorVectorizedDim(char const* tensorName, int32_t profileIndex) const noexcept
2939 {
2940 return mImpl->getTensorVectorizedDimV2(tensorName, profileIndex);
2941 }
2942
2953 char const* getName() const noexcept
2954 {
2955 return mImpl->getName();
2956 }
2957
2964 int32_t getNbOptimizationProfiles() const noexcept
2965 {
2966 return mImpl->getNbOptimizationProfiles();
2967 }
2968
2984 Dims getProfileShape(char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
2985 {
2986 return mImpl->getProfileShape(tensorName, profileIndex, select);
2987 }
2988
3006 int32_t const* getProfileTensorValues(char const* tensorName, int32_t profileIndex, OptProfileSelector select) const
3007 noexcept
3008 {
3009 return mImpl->getProfileTensorValues(tensorName, profileIndex, select);
3010 }
3011
3023 {
3024 return mImpl->getEngineCapability();
3025 }
3026
3041 void setErrorRecorder(IErrorRecorder* recorder) noexcept
3042 {
3043 return mImpl->setErrorRecorder(recorder);
3044 }
3045
3057 {
3058 return mImpl->getErrorRecorder();
3059 }
3060
3071 {
3072 return mImpl->hasImplicitBatchDimension();
3073 }
3074
3087 {
3088 return mImpl->getTacticSources();
3089 }
3090
3099 {
3100 return mImpl->getProfilingVerbosity();
3101 }
3102
3109 {
3110 return mImpl->createEngineInspector();
3111 }
3112
3121 int32_t getNbIOTensors() const noexcept
3122 {
3123 return mImpl->getNbIOTensors();
3124 }
3125
3133 char const* getIOTensorName(int32_t index) const noexcept
3134 {
3135 return mImpl->getIOTensorName(index);
3136 }
3137
3147 {
3148 return mImpl->getHardwareCompatibilityLevel();
3149 }
3150
3161 int32_t getNbAuxStreams() const noexcept
3162 {
3163 return mImpl->getNbAuxStreams();
3164 }
3165
3172 {
3173 return mImpl->createSerializationConfig();
3174 }
3175
3188 {
3189 return mImpl->serializeWithConfig(config);
3190 }
3191
3232 TRT_DEPRECATED bool setWeightStreamingBudget(int64_t gpuMemoryBudget) noexcept
3233 {
3234 return mImpl->setWeightStreamingBudget(gpuMemoryBudget);
3235 }
3236
3253 {
3254 return mImpl->getWeightStreamingBudget();
3255 }
3256
3276 {
3277 return mImpl->getMinimumWeightStreamingBudget();
3278 }
3279
3291 int64_t getStreamableWeightsSize() const noexcept
3292 {
3293 return mImpl->getStreamableWeightsSize();
3294 }
3295
3333 bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept
3334 {
3335 return mImpl->setWeightStreamingBudgetV2(gpuMemoryBudget);
3336 }
3337
3351 int64_t getWeightStreamingBudgetV2() const noexcept
3352 {
3353 return mImpl->getWeightStreamingBudgetV2();
3354 }
3355
3376 int64_t getWeightStreamingAutomaticBudget() const noexcept
3377 {
3378 return mImpl->getWeightStreamingAutomaticBudget();
3379 }
3380
3405 {
3406 return mImpl->getWeightStreamingScratchMemorySize();
3407 }
3408
3418 bool isDebugTensor(char const* name) const noexcept
3419 {
3420 return mImpl->isDebugTensor(name);
3421 }
3422
3423protected:
3424 apiv::VCudaEngine* mImpl;
3425};
3426
3427namespace v_1_0
3428{
3430{
3431public:
3435 InterfaceInfo getInterfaceInfo() const noexcept override
3436 {
3437 return {"IOutputAllocator", 1, 0};
3438 }
3439
3460 char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment) noexcept
3461 {
3462 return nullptr;
3463 }
3464
3488 char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment, cudaStream_t /*stream*/)
3489 {
3490 return reallocateOutput(tensorName, currentMemory, size, alignment);
3491 }
3492
3501 virtual void notifyShape(char const* tensorName, Dims const& dims) noexcept = 0;
3502};
3503} // namespace v_1_0
3504
3513
3514namespace v_1_0
3515{
3517{
3518public:
3522 InterfaceInfo getInterfaceInfo() const noexcept override
3523 {
3524 return {"IDebugListener", 1, 0};
3525 }
3526
3540 virtual bool processDebugTensor(void const* addr, TensorLocation location, DataType type, Dims const& shape,
3541 char const* name, cudaStream_t stream)
3542 = 0;
3543
3544 ~IDebugListener() override = default;
3545};
3546} // namespace v_1_0
3547
3554
3566{
3567public:
3568 virtual ~IExecutionContext() noexcept = default;
3569
3578 void setDebugSync(bool sync) noexcept
3579 {
3580 mImpl->setDebugSync(sync);
3581 }
3582
3588 bool getDebugSync() const noexcept
3589 {
3590 return mImpl->getDebugSync();
3591 }
3592
3598 void setProfiler(IProfiler* profiler) noexcept
3599 {
3600 mImpl->setProfiler(profiler);
3601 }
3602
3608 IProfiler* getProfiler() const noexcept
3609 {
3610 return mImpl->getProfiler();
3611 }
3612
3618 ICudaEngine const& getEngine() const noexcept
3619 {
3620 return mImpl->getEngine();
3621 }
3622
3632 void setName(char const* name) noexcept
3633 {
3634 mImpl->setName(name);
3635 }
3636
3642 char const* getName() const noexcept
3643 {
3644 return mImpl->getName();
3645 }
3646
3669 void setDeviceMemory(void* memory) noexcept
3670 {
3671 mImpl->setDeviceMemory(memory);
3672 }
3673
3691 void setDeviceMemoryV2(void* memory, int64_t size) noexcept
3692 {
3693 return mImpl->setDeviceMemoryV2(memory, size);
3694 }
3695
3712 Dims getTensorStrides(char const* tensorName) const noexcept
3713 {
3714 return mImpl->getTensorStrides(tensorName);
3715 }
3716
3717public:
3727 int32_t getOptimizationProfile() const noexcept
3728 {
3729 return mImpl->getOptimizationProfile();
3730 }
3731
3745 bool setInputShape(char const* tensorName, Dims const& dims) noexcept
3746 {
3747 return mImpl->setInputShape(tensorName, dims);
3748 }
3749
3782 Dims getTensorShape(char const* tensorName) const noexcept
3783 {
3784 return mImpl->getTensorShape(tensorName);
3785 }
3786
3798 bool allInputDimensionsSpecified() const noexcept
3799 {
3800 return mImpl->allInputDimensionsSpecified();
3801 }
3802
3816 {
3817 return mImpl->allInputShapesSpecified();
3818 }
3819
3834 void setErrorRecorder(IErrorRecorder* recorder) noexcept
3835 {
3836 mImpl->setErrorRecorder(recorder);
3837 }
3838
3850 {
3851 return mImpl->getErrorRecorder();
3852 }
3853
3866 bool executeV2(void* const* bindings) noexcept
3867 {
3868 return mImpl->executeV2(bindings);
3869 }
3870
3910 bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
3911 {
3912 return mImpl->setOptimizationProfileAsync(profileIndex, stream);
3913 }
3914
3926 void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
3927 {
3928 mImpl->setEnqueueEmitsProfile(enqueueEmitsProfile);
3929 }
3930
3938 bool getEnqueueEmitsProfile() const noexcept
3939 {
3940 return mImpl->getEnqueueEmitsProfile();
3941 }
3942
3968 bool reportToProfiler() const noexcept
3969 {
3970 return mImpl->reportToProfiler();
3971 }
3972
4009 bool setTensorAddress(char const* tensorName, void* data) noexcept
4010 {
4011 return mImpl->setTensorAddress(tensorName, data);
4012 }
4013
4026 void const* getTensorAddress(char const* tensorName) const noexcept
4027 {
4028 return mImpl->getTensorAddress(tensorName);
4029 }
4030
4049 bool setOutputTensorAddress(char const* tensorName, void* data) noexcept
4050 {
4051 return mImpl->setOutputTensorAddress(tensorName, data);
4052 }
4053
4071 bool setInputTensorAddress(char const* tensorName, void const* data) noexcept
4072 {
4073 return mImpl->setInputTensorAddress(tensorName, data);
4074 }
4075
4090 void* getOutputTensorAddress(char const* tensorName) const noexcept
4091 {
4092 return mImpl->getOutputTensorAddress(tensorName);
4093 }
4094
4123 int32_t inferShapes(int32_t nbMaxNames, char const** tensorNames) noexcept
4124 {
4125 return mImpl->inferShapes(nbMaxNames, tensorNames);
4126 }
4127
4141 {
4142 return mImpl->updateDeviceMemorySizeForShapes();
4143 }
4144
4156 bool setInputConsumedEvent(cudaEvent_t event) noexcept
4157 {
4158 return mImpl->setInputConsumedEvent(event);
4159 }
4160
4166 cudaEvent_t getInputConsumedEvent() const noexcept
4167 {
4168 return mImpl->getInputConsumedEvent();
4169 }
4170
4185 bool setOutputAllocator(char const* tensorName, IOutputAllocator* outputAllocator) noexcept
4186 {
4187 return mImpl->setOutputAllocator(tensorName, outputAllocator);
4188 }
4189
4198 IOutputAllocator* getOutputAllocator(char const* tensorName) const noexcept
4199 {
4200 return mImpl->getOutputAllocator(tensorName);
4201 }
4202
4216 int64_t getMaxOutputSize(char const* tensorName) const noexcept
4217 {
4218 return mImpl->getMaxOutputSize(tensorName);
4219 }
4220
4237 {
4238 return mImpl->setTemporaryStorageAllocator(allocator);
4239 }
4240
4247 {
4248 return mImpl->getTemporaryStorageAllocator();
4249 }
4250
4270 bool enqueueV3(cudaStream_t stream) noexcept
4271 {
4272 return mImpl->enqueueV3(stream);
4273 }
4274
4286 void setPersistentCacheLimit(size_t size) noexcept
4287 {
4288 mImpl->setPersistentCacheLimit(size);
4289 }
4290
4297 size_t getPersistentCacheLimit() const noexcept
4298 {
4299 return mImpl->getPersistentCacheLimit();
4300 }
4301
4321 bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
4322 {
4323 return mImpl->setNvtxVerbosity(verbosity);
4324 }
4325
4334 {
4335 return mImpl->getNvtxVerbosity();
4336 }
4337
4364 void setAuxStreams(cudaStream_t* auxStreams, int32_t nbStreams) noexcept
4365 {
4366 mImpl->setAuxStreams(auxStreams, nbStreams);
4367 }
4368
4376 bool setDebugListener(IDebugListener* listener) noexcept
4377 {
4378 return mImpl->setDebugListener(listener);
4379 }
4380
4387 {
4388 return mImpl->getDebugListener();
4389 }
4390
4405 bool setTensorDebugState(char const* name, bool flag) noexcept
4406 {
4407 return mImpl->setTensorDebugState(name, flag);
4408 }
4409
4418 bool setAllTensorsDebugState(bool flag) noexcept
4419 {
4420 return mImpl->setAllTensorsDebugState(flag);
4421 }
4422
4428 bool getDebugState(char const* name) const noexcept
4429 {
4430 return mImpl->getDebugState(name);
4431 }
4432
4433protected:
4434 apiv::VExecutionContext* mImpl;
4435}; // class IExecutionContext
4436
4444enum class LayerInformationFormat : int32_t
4445{
4446 kONELINE = 0,
4447 kJSON = 1,
4448};
4449
4452template <>
4453constexpr inline int32_t EnumMax<LayerInformationFormat>() noexcept
4454{
4455 return 2;
4456}
4457
4474{
4475public:
4476 virtual ~IEngineInspector() noexcept = default;
4477
4490 bool setExecutionContext(IExecutionContext const* context) noexcept
4491 {
4492 return mImpl->setExecutionContext(context);
4493 }
4494
4503 {
4504 return mImpl->getExecutionContext();
4505 }
4506
4527 char const* getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
4528 {
4529 return mImpl->getLayerInformation(layerIndex, format);
4530 }
4531
4550 char const* getEngineInformation(LayerInformationFormat format) const noexcept
4551 {
4552 return mImpl->getEngineInformation(format);
4553 }
4554
4569 void setErrorRecorder(IErrorRecorder* recorder) noexcept
4570 {
4571 mImpl->setErrorRecorder(recorder);
4572 }
4573
4585 {
4586 return mImpl->getErrorRecorder();
4587 }
4588
4589protected:
4590 apiv::VEngineInspector* mImpl;
4591}; // class IEngineInspector
4592
4593} // namespace nvinfer1
4594
4599extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(void* logger, int32_t version) noexcept;
4600
4605extern "C" TENSORRTAPI void* createInferRefitter_INTERNAL(void* engine, void* logger, int32_t version) noexcept;
4606
4611
4617extern "C" TENSORRTAPI nvinfer1::ILogger* getLogger() noexcept;
4618
4619namespace nvinfer1
4620{
4621namespace // unnamed namespace avoids linkage surprises when linking objects built with different versions of this
4622 // header.
4623{
4629inline IRuntime* createInferRuntime(ILogger& logger) noexcept
4630{
4631 return static_cast<IRuntime*>(createInferRuntime_INTERNAL(&logger, NV_TENSORRT_VERSION));
4632}
4633
4639inline IRefitter* createInferRefitter(ICudaEngine& engine, ILogger& logger) noexcept
4640{
4641 return static_cast<IRefitter*>(createInferRefitter_INTERNAL(&engine, &logger, NV_TENSORRT_VERSION));
4642}
4643
4644} // namespace
4645
4657template <typename T>
4659{
4660public:
4662 {
4663 getPluginRegistry()->registerCreator(instance, "");
4664 }
4665
4666private:
4668 T instance{};
4669};
4670
4671} // namespace nvinfer1
4672
4673#define REGISTER_TENSORRT_PLUGIN(name) \
4674 static nvinfer1::PluginRegistrar<name> pluginRegistrar##name {}
4675
4676namespace nvinfer1
4677{
4687{
4688public:
4696 virtual ILogger* findLogger() = 0;
4697
4698protected:
4699 virtual ~ILoggerFinder() = default;
4700};
4701
4704namespace v_1_0
4705{
4706
4708{
4709public:
4711 ~IGpuAsyncAllocator() override = default;
4712
4742 void* allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags,
4743 cudaStream_t /*stream*/) noexcept override = 0;
4744
4770 bool deallocateAsync(void* const memory, cudaStream_t /*stream*/) noexcept override = 0;
4771
4795 uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept override
4796 {
4797 return allocateAsync(size, alignment, flags, nullptr);
4798 }
4799
4818 TRT_DEPRECATED bool deallocate(void* const memory) noexcept override
4819 {
4820 return deallocateAsync(memory, nullptr);
4821 }
4822
4826 InterfaceInfo getInterfaceInfo() const noexcept override
4827 {
4828 return {"IGpuAllocator", 1, 0};
4829 }
4830};
4831} // namespace v_1_0
4832
4847
4848} // namespace nvinfer1
4849
4853extern "C" TENSORRTAPI int32_t getInferLibMajorVersion() noexcept;
4857extern "C" TENSORRTAPI int32_t getInferLibMinorVersion() noexcept;
4861extern "C" TENSORRTAPI int32_t getInferLibPatchVersion() noexcept;
4865extern "C" TENSORRTAPI int32_t getInferLibBuildVersion() noexcept;
4866
4867#endif // NV_INFER_RUNTIME_H
nvinfer1::ILogger * getLogger() noexcept
Return the logger object.
int32_t getInferLibMajorVersion() noexcept
Return the library major version number.
nvinfer1::IPluginRegistry * getPluginRegistry() noexcept
Return the plugin registry.
int32_t getInferLibPatchVersion() noexcept
Return the library patch version number.
int32_t getInferLibMinorVersion() noexcept
Return the library minor version number.
int32_t getInferLibBuildVersion() noexcept
Return the library build version number.
#define TENSORRTAPI
Definition: NvInferRuntimeBase.h:59
#define NV_TENSORRT_VERSION
Definition: NvInferRuntimeBase.h:93
#define TRT_DEPRECATED
Definition: NvInferRuntimeBase.h:45
#define TRT_DEPRECATED_ENUM
Definition: NvInferRuntimeBase.h:46
Structure to define the dimensions of a tensor.
Definition: NvInferRuntimeBase.h:202
static constexpr int32_t MAX_DIMS
The maximum rank (number of dimensions) supported for a tensor.
Definition: NvInferRuntimeBase.h:205
Analog of class Dims with expressions instead of constants for the dimensions.
Definition: NvInferRuntime.h:347
IDimensionExpr const * d[Dims::MAX_DIMS]
The extent of each dimension.
Definition: NvInferRuntime.h:350
int32_t nbDims
The number of dimensions.
Definition: NvInferRuntime.h:349
An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:2547
int32_t getTensorBytesPerComponent(char const *tensorName) const noexcept
Return the number of bytes per component of an element, or -1 if the tensor is not vectorized or prov...
Definition: NvInferRuntime.h:2772
ISerializationConfig * createSerializationConfig() noexcept
Create a serialization configuration object.
Definition: NvInferRuntime.h:3171
TRT_DEPRECATED int64_t getWeightStreamingBudget() const noexcept
Returns the current weight streaming device memory budget in bytes.
Definition: NvInferRuntime.h:3252
int32_t const * getProfileTensorValues(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values (not dimensions) for an input tensor given its name under ...
Definition: NvInferRuntime.h:3006
char const * getIOTensorName(int32_t index) const noexcept
Return name of an IO tensor.
Definition: NvInferRuntime.h:3133
int64_t getWeightStreamingBudgetV2() const noexcept
Returns the current weight streaming device memory budget in bytes.
Definition: NvInferRuntime.h:3351
EngineCapability getEngineCapability() const noexcept
Determine what execution capability this engine has.
Definition: NvInferRuntime.h:3022
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:3056
TensorFormat getTensorFormat(char const *tensorName, int32_t profileIndex) const noexcept
Return the tensor format of given profile, or TensorFormat::kLINEAR if the provided name does not map...
Definition: NvInferRuntime.h:2858
TRT_DEPRECATED bool hasImplicitBatchDimension() const noexcept
Query whether the engine was built with an implicit batch dimension.
Definition: NvInferRuntime.h:3070
apiv::VCudaEngine * mImpl
Definition: NvInferRuntime.h:3424
TRT_DEPRECATED size_t getDeviceMemorySizeForProfile(int32_t profileIndex) const noexcept
Return the maximum device memory required by the context for a profile.
Definition: NvInferRuntime.h:2709
IExecutionContext * createExecutionContext(ExecutionContextAllocationStrategy strategy=ExecutionContextAllocationStrategy::kSTATIC) noexcept
Create an execution context and specify the strategy for allocating internal activation memory.
Definition: NvInferRuntime.h:2621
char const * getTensorFormatDesc(char const *tensorName) const noexcept
Return the human readable description of the tensor format, or empty string if the provided name does...
Definition: NvInferRuntime.h:2882
Dims getProfileShape(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for an input tensor given its name under an optimizati...
Definition: NvInferRuntime.h:2984
bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept
Limit the maximum amount of GPU memory usable for network weights in bytes.
Definition: NvInferRuntime.h:3333
int32_t getNbAuxStreams() const noexcept
Return the number of auxiliary streams used by this engine.
Definition: NvInferRuntime.h:3161
int64_t getStreamableWeightsSize() const noexcept
Get the total size in bytes of all streamable weights.
Definition: NvInferRuntime.h:3291
DataType getTensorDataType(char const *tensorName) const noexcept
Determine the required data type for a buffer from its tensor name.
Definition: NvInferRuntime.h:2576
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:3041
TacticSources getTacticSources() const noexcept
return the tactic sources required by this engine.
Definition: NvInferRuntime.h:3086
IHostMemory * serializeWithConfig(ISerializationConfig &config) const noexcept
Serialize the network to a stream with the provided SerializationConfig.
Definition: NvInferRuntime.h:3187
virtual ~ICudaEngine() noexcept=default
int64_t getWeightStreamingAutomaticBudget() const noexcept
TensorRT automatically determines a device memory budget for the model to run. The budget is close to...
Definition: NvInferRuntime.h:3376
bool isDebugTensor(char const *name) const noexcept
Check if a tensor is marked as a debug tensor.
Definition: NvInferRuntime.h:3418
int32_t getTensorVectorizedDim(char const *tensorName, int32_t profileIndex) const noexcept
Return the dimension index that the buffer is vectorized of given profile, or -1 if the provided name...
Definition: NvInferRuntime.h:2938
char const * getName() const noexcept
Returns the name of the network associated with the engine.
Definition: NvInferRuntime.h:2953
ProfilingVerbosity getProfilingVerbosity() const noexcept
Return the ProfilingVerbosity the builder config was set to when the engine was built.
Definition: NvInferRuntime.h:3098
bool isShapeInferenceIO(char const *tensorName) const noexcept
True if tensor is required as input for shape calculations or is output from shape calculations.
Definition: NvInferRuntime.h:2659
int64_t getWeightStreamingScratchMemorySize() const noexcept
Returns the size of the scratch memory required by the current weight streaming budget.
Definition: NvInferRuntime.h:3404
TRT_DEPRECATED bool setWeightStreamingBudget(int64_t gpuMemoryBudget) noexcept
Limit the maximum amount of GPU memory usable for network weights in bytes.
Definition: NvInferRuntime.h:3232
int64_t getDeviceMemorySizeV2() const noexcept
Return the maximum device memory required by the context over all profiles.
Definition: NvInferRuntime.h:2725
int32_t getTensorVectorizedDim(char const *tensorName) const noexcept
Return the dimension index that the buffer is vectorized, or -1 if the provided name does not map to ...
Definition: NvInferRuntime.h:2922
TRT_DEPRECATED size_t getDeviceMemorySize() const noexcept
Return the maximum device memory required by the context over all profiles.
Definition: NvInferRuntime.h:2697
int32_t getTensorComponentsPerElement(char const *tensorName, int32_t profileIndex) const noexcept
Return the number of components included in one element of given profile, or -1 if tensor is not vect...
Definition: NvInferRuntime.h:2829
int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept
Return the maximum device memory required by the context for a profile.
Definition: NvInferRuntime.h:2741
TensorFormat getTensorFormat(char const *tensorName) const noexcept
Return the tensor format, or TensorFormat::kLINEAR if the provided name does not map to an input or o...
Definition: NvInferRuntime.h:2844
IHostMemory * serialize() const noexcept
Serialize the network to a stream.
Definition: NvInferRuntime.h:2604
TensorLocation getTensorLocation(char const *tensorName) const noexcept
Get whether an input or output tensor must be on GPU or CPU.
Definition: NvInferRuntime.h:2639
IEngineInspector * createEngineInspector() const noexcept
Create a new engine inspector which prints the layer information in an engine or an execution context...
Definition: NvInferRuntime.h:3108
int32_t getTensorBytesPerComponent(char const *tensorName, int32_t profileIndex) const noexcept
Return the number of bytes per component of an element given of given profile, or -1 if the tensor is...
Definition: NvInferRuntime.h:2790
HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept
Return the hardware compatibility level of this engine.
Definition: NvInferRuntime.h:3146
int32_t getNbOptimizationProfiles() const noexcept
Get the number of optimization profiles defined for this engine.
Definition: NvInferRuntime.h:2964
TRT_DEPRECATED IExecutionContext * createExecutionContextWithoutDeviceMemory() noexcept
create an execution context without any device memory allocated
Definition: NvInferRuntime.h:2685
char const * getTensorFormatDesc(char const *tensorName, int32_t profileIndex) const noexcept
Return the human readable description of the tensor format of given profile, or empty string if the p...
Definition: NvInferRuntime.h:2905
TRT_DEPRECATED int64_t getMinimumWeightStreamingBudget() const noexcept
The minimum number of bytes of GPU memory required by network weights for successful weight streaming...
Definition: NvInferRuntime.h:3275
TensorIOMode getTensorIOMode(char const *tensorName) const noexcept
Determine whether a tensor is an input or output tensor.
Definition: NvInferRuntime.h:2673
int32_t getNbLayers() const noexcept
Get the number of layers in the network.
Definition: NvInferRuntime.h:2590
int32_t getNbIOTensors() const noexcept
Return number of IO tensors.
Definition: NvInferRuntime.h:3121
int32_t getTensorComponentsPerElement(char const *tensorName) const noexcept
Return the number of components included in one element, or -1 if tensor is not vectorized or if the ...
Definition: NvInferRuntime.h:2811
bool isRefittable() const noexcept
Return true if an engine can be refit.
Definition: NvInferRuntime.h:2751
An IDimensionExpr represents an integer expression constructed from constants, input dimensions,...
Definition: NvInferRuntime.h:229
bool isConstant() const noexcept
Return true if expression is a build-time constant.
Definition: NvInferRuntime.h:234
bool isSizeTensor() const noexcept
Return true if this denotes the value of a size tensor.
Definition: NvInferRuntime.h:260
virtual ~IDimensionExpr() noexcept=default
apiv::VDimensionExpr * mImpl
Definition: NvInferRuntime.h:251
int64_t getConstantValue() const noexcept
Get the value of the constant.
Definition: NvInferRuntime.h:245
An engine inspector which prints out the layer information of an engine or an execution context.
Definition: NvInferRuntime.h:4474
char const * getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
Get a string describing the information about a specific layer in the current engine or the execution...
Definition: NvInferRuntime.h:4527
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:4584
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:4569
IExecutionContext const * getExecutionContext() const noexcept
Get the context currently being inspected.
Definition: NvInferRuntime.h:4502
apiv::VEngineInspector * mImpl
Definition: NvInferRuntime.h:4590
virtual ~IEngineInspector() noexcept=default
char const * getEngineInformation(LayerInformationFormat format) const noexcept
Get a string describing the information about all the layers in the current engine or the execution c...
Definition: NvInferRuntime.h:4550
Context for executing inference using an engine, with functionally unsafe features.
Definition: NvInferRuntime.h:3566
IOutputAllocator * getOutputAllocator(char const *tensorName) const noexcept
Get output allocator associated with output tensor of given name, or nullptr if the provided name doe...
Definition: NvInferRuntime.h:4198
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:3849
bool reportToProfiler() const noexcept
Calculate layer timing info for the current optimization profile in IExecutionContext and update the ...
Definition: NvInferRuntime.h:3968
virtual ~IExecutionContext() noexcept=default
void setDeviceMemory(void *memory) noexcept
Set the device memory for use by this execution context.
Definition: NvInferRuntime.h:3669
TRT_DEPRECATED bool allInputShapesSpecified() const noexcept
Whether all input shape bindings have been specified.
Definition: NvInferRuntime.h:3815
bool setTensorDebugState(char const *name, bool flag) noexcept
Set debug state of tensor given the tensor name.
Definition: NvInferRuntime.h:4405
char const * getName() const noexcept
Return the name of the execution context.
Definition: NvInferRuntime.h:3642
IGpuAllocator * getTemporaryStorageAllocator() const noexcept
Get allocator set by setTemporaryStorageAllocator.
Definition: NvInferRuntime.h:4246
void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
Set whether enqueue emits layer timing to the profiler.
Definition: NvInferRuntime.h:3926
Dims getTensorShape(char const *tensorName) const noexcept
Return the shape of the given input or output.
Definition: NvInferRuntime.h:3782
bool getDebugState(char const *name) const noexcept
Definition: NvInferRuntime.h:4428
bool setInputShape(char const *tensorName, Dims const &dims) noexcept
Set shape of given input.
Definition: NvInferRuntime.h:3745
bool executeV2(void *const *bindings) noexcept
Synchronously execute a network.
Definition: NvInferRuntime.h:3866
bool getEnqueueEmitsProfile() const noexcept
Get the enqueueEmitsProfile state.
Definition: NvInferRuntime.h:3938
void const * getTensorAddress(char const *tensorName) const noexcept
Get memory address bound to given input or output tensor, or nullptr if the provided name does not ma...
Definition: NvInferRuntime.h:4026
bool setOutputAllocator(char const *tensorName, IOutputAllocator *outputAllocator) noexcept
Set output allocator to use for output tensor of given name. Pass nullptr to outputAllocator to unset...
Definition: NvInferRuntime.h:4185
bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
Select an optimization profile for the current context with async semantics.
Definition: NvInferRuntime.h:3910
apiv::VExecutionContext * mImpl
Definition: NvInferRuntime.h:4434
bool setOutputTensorAddress(char const *tensorName, void *data) noexcept
Set the memory address for a given output tensor.
Definition: NvInferRuntime.h:4049
void setPersistentCacheLimit(size_t size) noexcept
Set the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:4286
size_t getPersistentCacheLimit() const noexcept
Get the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:4297
bool setAllTensorsDebugState(bool flag) noexcept
Definition: NvInferRuntime.h:4418
ICudaEngine const & getEngine() const noexcept
Get the associated engine.
Definition: NvInferRuntime.h:3618
ProfilingVerbosity getNvtxVerbosity() const noexcept
Get the NVTX verbosity of the execution context.
Definition: NvInferRuntime.h:4333
size_t updateDeviceMemorySizeForShapes() noexcept
Recompute the internal activation buffer sizes based on the current input shapes, and return the tota...
Definition: NvInferRuntime.h:4140
void setAuxStreams(cudaStream_t *auxStreams, int32_t nbStreams) noexcept
Set the auxiliary streams that TensorRT should launch kernels on in the next enqueueV3() call.
Definition: NvInferRuntime.h:4364
int64_t getMaxOutputSize(char const *tensorName) const noexcept
Get upper bound on an output tensor's size, in bytes, based on the current optimization profile and i...
Definition: NvInferRuntime.h:4216
int32_t inferShapes(int32_t nbMaxNames, char const **tensorNames) noexcept
Run shape calculations.
Definition: NvInferRuntime.h:4123
bool setDebugListener(IDebugListener *listener) noexcept
Set DebugListener for this execution context.
Definition: NvInferRuntime.h:4376
bool setTensorAddress(char const *tensorName, void *data) noexcept
Set memory address for given input or output tensor.
Definition: NvInferRuntime.h:4009
bool setTemporaryStorageAllocator(IGpuAllocator *allocator) noexcept
Specify allocator to use for internal temporary storage.
Definition: NvInferRuntime.h:4236
void * getOutputTensorAddress(char const *tensorName) const noexcept
Get memory address for given output.
Definition: NvInferRuntime.h:4090
bool enqueueV3(cudaStream_t stream) noexcept
Enqueue inference on a stream.
Definition: NvInferRuntime.h:4270
IDebugListener * getDebugListener() noexcept
Get the DebugListener of this execution context.
Definition: NvInferRuntime.h:4386
int32_t getOptimizationProfile() const noexcept
Get the index of the currently selected optimization profile.
Definition: NvInferRuntime.h:3727
bool setInputTensorAddress(char const *tensorName, void const *data) noexcept
Set memory address for given input.
Definition: NvInferRuntime.h:4071
bool getDebugSync() const noexcept
Get the debug sync flag.
Definition: NvInferRuntime.h:3588
bool setInputConsumedEvent(cudaEvent_t event) noexcept
Mark input as consumed.
Definition: NvInferRuntime.h:4156
Dims getTensorStrides(char const *tensorName) const noexcept
Return the strides of the buffer for the given tensor name.
Definition: NvInferRuntime.h:3712
bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
Set the verbosity of the NVTX markers in the execution context.
Definition: NvInferRuntime.h:4321
IProfiler * getProfiler() const noexcept
Get the profiler.
Definition: NvInferRuntime.h:3608
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:3834
void setDeviceMemoryV2(void *memory, int64_t size) noexcept
Set the device memory and its corresponding size for use by this execution context.
Definition: NvInferRuntime.h:3691
bool allInputDimensionsSpecified() const noexcept
Whether all dynamic dimensions of input tensors have been specified.
Definition: NvInferRuntime.h:3798
void setProfiler(IProfiler *profiler) noexcept
Set the profiler.
Definition: NvInferRuntime.h:3598
void setName(char const *name) noexcept
Set the name of the execution context.
Definition: NvInferRuntime.h:3632
cudaEvent_t getInputConsumedEvent() const noexcept
The event associated with consuming the input.
Definition: NvInferRuntime.h:4166
Object for constructing IDimensionExpr.
Definition: NvInferRuntime.h:284
IDimensionExpr const * operation(DimensionOperation op, IDimensionExpr const &first, IDimensionExpr const &second) noexcept
Get the operation.
Definition: NvInferRuntime.h:300
virtual ~IExprBuilder() noexcept=default
IDimensionExpr const * constant(int64_t value) noexcept
Return pointer to IDimensionExp for given value.
Definition: NvInferRuntime.h:289
apiv::VExprBuilder * mImpl
Definition: NvInferRuntime.h:307
IDimensionExpr const * declareSizeTensor(int32_t outputIndex, IDimensionExpr const &opt, IDimensionExpr const &upper)
Declare a size tensor at the given output index, with the specified auto-tuning formula and upper bou...
Definition: NvInferRuntime.h:335
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:139
void * data() const noexcept
A pointer to the raw data that is owned by the library.
Definition: NvInferRuntime.h:144
DataType type() const noexcept
The type of the memory that was allocated.
Definition: NvInferRuntime.h:156
std::size_t size() const noexcept
The size in bytes of the data that was allocated.
Definition: NvInferRuntime.h:150
apiv::VHostMemory * mImpl
Definition: NvInferRuntime.h:162
virtual ~IHostMemory() noexcept=default
A virtual base class to find a logger. Allows a plugin to find an instance of a logger if it needs to...
Definition: NvInferRuntime.h:4687
virtual ILogger * findLogger()=0
Get the logger used by the engine or execution context which called the plugin method.
virtual ~ILoggerFinder()=default
Application-implemented logging interface for the builder, refitter and runtime.
Definition: NvInferRuntimeBase.h:683
Forward declaration of IEngineInspector for use by other interfaces.
Definition: NvInferRuntime.h:48
INoCopy & operator=(INoCopy &&other)=delete
INoCopy(INoCopy const &other)=delete
INoCopy(INoCopy &&other)=delete
virtual ~INoCopy()=default
INoCopy & operator=(INoCopy const &other)=delete
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:2161
int32_t const * getShapeValues(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2275
apiv::VOptimizationProfile * mImpl
Definition: NvInferRuntime.h:2328
Dims getDimensions(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:2202
virtual ~IOptimizationProfile() noexcept=default
float getExtraMemoryTarget() const noexcept
Get the extra memory target that has been defined for this profile.
Definition: NvInferRuntime.h:2305
bool setExtraMemoryTarget(float target) noexcept
Set a target for extra GPU memory that may be used by this profile.
Definition: NvInferRuntime.h:2293
bool setDimensions(char const *inputName, OptProfileSelector select, Dims const &dims) noexcept
Set the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:2190
bool setShapeValues(char const *inputName, OptProfileSelector select, int32_t const *values, int32_t nbValues) noexcept
Set the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2249
bool isValid() const noexcept
Check whether the optimization profile can be passed to an IBuilderConfig object.
Definition: NvInferRuntime.h:2322
int32_t getNbShapeValues(char const *inputName) const noexcept
Get the number of values for an input shape tensor.
Definition: NvInferRuntime.h:2263
Single registration point for all plugins in an application. It is used to find plugin implementation...
Definition: NvInferRuntimeCommon.h:56
virtual TRT_DEPRECATED bool registerCreator(IPluginCreator &creator, AsciiChar const *const pluginNamespace) noexcept=0
Register a plugin creator implementing IPluginCreator. Returns false if any plugin creator with the s...
Interface for plugins to access per context resources provided by TensorRT.
Definition: NvInferRuntime.h:639
virtual IErrorRecorder * getErrorRecorder() const noexcept=0
Get the error recorder associated with the resource context.
virtual IGpuAllocator * getGpuAllocator() const noexcept=0
Get the GPU allocator associated with the resource context.
Similar to IPluginV2Ext, but with support for dynamic shapes.
Definition: NvInferRuntime.h:404
IPluginV2DynamicExt * clone() const noexcept override=0
Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin...
virtual ~IPluginV2DynamicExt() noexcept
Definition: NvInferRuntime.h:555
Plugin class for user-implemented layers.
Definition: NvInferRuntimePlugin.h:464
Updates weights in an engine.
Definition: NvInferRuntime.h:1681
bool refitCudaEngineAsync(cudaStream_t stream) noexcept
Enqueue weights refitting of the associated engine on the given stream.
Definition: NvInferRuntime.h:2083
int32_t getMaxThreads() const noexcept
get the maximum number of threads that can be used by the refitter.
Definition: NvInferRuntime.h:1962
TensorLocation getWeightsLocation(char const *weightsName) const noexcept
Get location for the weights associated with the given name.
Definition: NvInferRuntime.h:2021
bool setNamedWeights(char const *name, Weights weights) noexcept
Specify new weights of given name.
Definition: NvInferRuntime.h:1886
int32_t getAllWeights(int32_t size, char const **weightsNames) noexcept
Get names of all weights that could be refit.
Definition: NvInferRuntime.h:1922
ILogger * getLogger() const noexcept
get the logger with which the refitter was created
Definition: NvInferRuntime.h:1932
bool refitCudaEngine() noexcept
Refits associated engine.
Definition: NvInferRuntime.h:1717
int32_t getMissingWeights(int32_t size, char const **weightsNames) noexcept
Get names of missing weights.
Definition: NvInferRuntime.h:1906
TRT_DEPRECATED float getDynamicRangeMax(char const *tensorName) const noexcept
Get maximum of dynamic range.
Definition: NvInferRuntime.h:1809
int32_t getMissing(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of missing weights.
Definition: NvInferRuntime.h:1738
Weights getNamedWeights(char const *weightsName) const noexcept
Get weights associated with the given name.
Definition: NvInferRuntime.h:2005
bool unsetNamedWeights(char const *weightsName) noexcept
Unset weights associated with the given name.
Definition: NvInferRuntime.h:2037
Weights getWeightsPrototype(char const *weightsName) const noexcept
Get the Weights prototype associated with the given name.
Definition: NvInferRuntime.h:2101
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:1948
TRT_DEPRECATED float getDynamicRangeMin(char const *tensorName) const noexcept
Get minimum of dynamic range.
Definition: NvInferRuntime.h:1793
TRT_DEPRECATED int32_t getTensorsWithDynamicRange(int32_t size, char const **tensorNames) const noexcept
Get names of all tensors that have refittable dynamic ranges.
Definition: NvInferRuntime.h:1827
bool setNamedWeights(char const *name, Weights weights, TensorLocation location) noexcept
Specify new weights on a specified device of given name.
Definition: NvInferRuntime.h:1989
void setWeightsValidation(bool weightsValidation) noexcept
Set whether to validate weights during refitting.
Definition: NvInferRuntime.h:2053
TRT_DEPRECATED bool setDynamicRange(char const *tensorName, float min, float max) noexcept
Definition: NvInferRuntime.h:1777
apiv::VRefitter * mImpl
Definition: NvInferRuntime.h:2107
int32_t getAll(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of all weights that could be refit.
Definition: NvInferRuntime.h:1755
virtual ~IRefitter() noexcept=default
bool getWeightsValidation() const noexcept
Get whether to validate weights values during refitting.
Definition: NvInferRuntime.h:2061
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:1846
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:1861
Allows a serialized functionally unsafe engine to be deserialized.
Definition: NvInferRuntime.h:1386
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:1526
IRuntime * loadRuntime(char const *path) noexcept
Load IRuntime from the file.
Definition: NvInferRuntime.h:1642
ICudaEngine * deserializeCudaEngine(IStreamReader &streamReader)
Deserialize an engine from a stream.
Definition: NvInferRuntime.h:1501
bool getEngineHostCodeAllowed() const noexcept
Get whether the runtime is allowed to deserialize engines with host executable code.
Definition: NvInferRuntime.h:1664
TempfileControlFlags getTempfileControlFlags() const noexcept
Get the tempfile control flags for this runtime.
Definition: NvInferRuntime.h:1614
void setEngineHostCodeAllowed(bool allowed) noexcept
Set whether the runtime is allowed to deserialize engines with host executable code.
Definition: NvInferRuntime.h:1654
virtual ~IRuntime() noexcept=default
void setTemporaryDirectory(char const *path) noexcept
Set the directory that will be used by this runtime for temporary files.
Definition: NvInferRuntime.h:1575
IPluginRegistry & getPluginRegistry() noexcept
Get the local plugin registry that can be used by the runtime.
Definition: NvInferRuntime.h:1624
apiv::VRuntime * mImpl
Definition: NvInferRuntime.h:1670
int32_t getNbDLACores() const noexcept
Returns number of DLA hardware cores accessible or 0 if DLA is unavailable.
Definition: NvInferRuntime.h:1419
ICudaEngine * deserializeCudaEngine(void const *blob, std::size_t size) noexcept
Deserialize an engine from host memory.
Definition: NvInferRuntime.h:1483
void setTempfileControlFlags(TempfileControlFlags flags) noexcept
Set the tempfile control flags for this runtime.
Definition: NvInferRuntime.h:1602
int32_t getDLACore() const noexcept
Get the DLA core that the engine executes on.
Definition: NvInferRuntime.h:1411
void setGpuAllocator(IGpuAllocator *allocator) noexcept
Set the GPU allocator.
Definition: NvInferRuntime.h:1434
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:1468
ILogger * getLogger() const noexcept
get the logger with which the runtime was created
Definition: NvInferRuntime.h:1511
int32_t getMaxThreads() const noexcept
Get the maximum number of threads that can be used by the runtime.
Definition: NvInferRuntime.h:1540
char const * getTemporaryDirectory() const noexcept
Get the directory that will be used by this runtime for temporary files.
Definition: NvInferRuntime.h:1586
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:1453
Holds properties for configuring an engine to serialize the binary.
Definition: NvInferRuntime.h:2438
virtual ~ISerializationConfig() noexcept=default
bool clearFlag(SerializationFlag serializationFlag) noexcept
clear a serialization flag.
Definition: NvInferRuntime.h:2477
bool setFlag(SerializationFlag serializationFlag) noexcept
Set a serialization flag.
Definition: NvInferRuntime.h:2489
SerializationFlags getFlags() const noexcept
Get the serialization flags for this config.
Definition: NvInferRuntime.h:2465
bool getFlag(SerializationFlag serializationFlag) const noexcept
Returns true if the serialization flag is set.
Definition: NvInferRuntime.h:2501
apiv::VSerializationConfig * mImpl
Definition: NvInferRuntime.h:2507
An Interface class for version control.
Definition: NvInferRuntimeBase.h:400
Version information associated with a TRT interface.
Definition: NvInferRuntimeBase.h:365
Register the plugin creator to the registry The static registry object will be instantiated when the ...
Definition: NvInferRuntime.h:4659
PluginRegistrar()
Definition: NvInferRuntime.h:4661
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:121
DataType type
The type of the weights.
Definition: NvInferRuntime.h:123
int64_t count
The number of weights in the array.
Definition: NvInferRuntime.h:125
void const * values
The weight values, in a contiguous array.
Definition: NvInferRuntime.h:124
Definition: NvInferRuntime.h:3517
virtual bool processDebugTensor(void const *addr, TensorLocation location, DataType type, Dims const &shape, char const *name, cudaStream_t stream)=0
Callback function that is called when a debug tensor’s value is updated and the debug state of the te...
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:3522
~IDebugListener() override=default
Definition: NvInferRuntimeBase.h:857
Definition: NvInferRuntimeBase.h:469
Definition: NvInferRuntime.h:4708
bool deallocateAsync(void *const memory, cudaStream_t) noexcept override=0
A thread-safe callback implemented by the application to handle stream-ordered asynchronous release o...
void * allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t) noexcept override=0
A thread-safe callback implemented by the application to handle stream-ordered asynchronous acquisiti...
TRT_DEPRECATED void * allocate(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept override
A thread-safe callback implemented by the application to handle acquisition of GPU memory.
Definition: NvInferRuntime.h:4794
TRT_DEPRECATED bool deallocate(void *const memory) noexcept override
A thread-safe callback implemented by the application to handle release of GPU memory.
Definition: NvInferRuntime.h:4818
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:4826
~IGpuAsyncAllocator() override=default
Definition: NvInferRuntime.h:3430
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:3435
virtual void * reallocateOutputAsync(char const *tensorName, void *currentMemory, uint64_t size, uint64_t alignment, cudaStream_t)
Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated....
Definition: NvInferRuntime.h:3487
virtual void notifyShape(char const *tensorName, Dims const &dims) noexcept=0
Called by TensorRT when the shape of the output tensor is known.
virtual TRT_DEPRECATED void * reallocateOutput(char const *tensorName, void *currentMemory, uint64_t size, uint64_t alignment) noexcept
Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated....
Definition: NvInferRuntime.h:3459
Definition: NvInferRuntime.h:665
Definition: NvInferRuntimePlugin.h:935
Definition: NvInferRuntime.h:1191
virtual PluginFieldCollection const * getFieldNames() noexcept=0
Return a list of fields that need to be passed to createPlugin() when creating a plugin for use in th...
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1196
virtual IPluginV3 * createPlugin(AsciiChar const *name, PluginFieldCollection const *fc, TensorRTPhase phase) noexcept=0
Return a plugin object. Return nullptr in case of error.
Definition: NvInferRuntime.h:687
virtual IPluginV3 * clone() noexcept=0
Clone the plugin object. This copies over internal plugin parameters and returns a new plugin object ...
virtual IPluginCapability * getCapabilityInterface(PluginCapabilityType type) noexcept=0
Return a pointer to plugin object implementing the specified PluginCapabilityType.
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:692
Definition: NvInferRuntime.h:784
virtual int32_t getFormatCombinationLimit() noexcept
Return the maximum number of format combinations that will be timed by TensorRT during the build phas...
Definition: NvInferRuntime.h:985
virtual int32_t getNbOutputs() const noexcept=0
Get the number of outputs from the plugin.
virtual int32_t configurePlugin(DynamicPluginTensorDesc const *in, int32_t nbInputs, DynamicPluginTensorDesc const *out, int32_t nbOutputs) noexcept=0
Configure the plugin.
virtual int32_t getNbTactics() noexcept
Query for the number of custom tactics the plugin intends to use.
Definition: NvInferRuntime.h:961
virtual char const * getMetadataString() noexcept
Query for a string representing the configuration of the plugin. May be called anytime after plugin c...
Definition: NvInferRuntime.h:996
virtual char const * getTimingCacheID() noexcept
Called to query the suffix to use for the timing cache ID. May be called anytime after plugin creatio...
Definition: NvInferRuntime.h:977
virtual bool supportsFormatCombination(int32_t pos, DynamicPluginTensorDesc const *inOut, int32_t nbInputs, int32_t nbOutputs) noexcept=0
Return true if plugin supports the format and datatype for the input/output indexed by pos.
virtual int32_t getOutputDataTypes(DataType *outputTypes, int32_t nbOutputs, const DataType *inputTypes, int32_t nbInputs) const noexcept=0
Provide the data types of the plugin outputs if the input tensors have the data types provided.
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:796
virtual int32_t getOutputShapes(DimsExprs const *inputs, int32_t nbInputs, DimsExprs const *shapeInputs, int32_t nbShapeInputs, DimsExprs *outputs, int32_t nbOutputs, IExprBuilder &exprBuilder) noexcept=0
Provide expressions for computing dimensions of the output tensors from dimensions of the input tenso...
virtual int32_t getValidTactics(int32_t *tactics, int32_t nbTactics) noexcept
Query for any custom tactics that the plugin intends to use.
Definition: NvInferRuntime.h:953
Definition: NvInferRuntime.h:741
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:746
virtual AsciiChar const * getPluginName() const noexcept=0
Return the plugin name. Should match the plugin name returned by the corresponding plugin creator.
Definition: NvInferRuntime.h:1003
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1008
virtual int32_t onShapeChange(PluginTensorDesc const *in, int32_t nbInputs, PluginTensorDesc const *out, int32_t nbOutputs) noexcept=0
Called when a plugin is being prepared for execution for specific dimensions. This could happen multi...
virtual PluginFieldCollection const * getFieldsToSerialize() noexcept=0
Get the plugin fields which should be serialized.
virtual int32_t setTactic(int32_t tactic) noexcept
Set the tactic to be used in the subsequent call to enqueue(). If no custom tactics were advertised,...
Definition: NvInferRuntime.h:1020
virtual int32_t enqueue(PluginTensorDesc const *inputDesc, PluginTensorDesc const *outputDesc, void const *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept=0
Execute the layer.
virtual IPluginV3 * attachToContext(IPluginResourceContext *context) noexcept=0
Clone the plugin, attach the cloned plugin object to a execution context and grant the cloned plugin ...
Definition: NvInferRuntime.h:1272
virtual ~IProfiler() noexcept
Definition: NvInferRuntime.h:1283
virtual void reportLayerTime(char const *layerName, float ms) noexcept=0
Layer time reporting callback.
Definition: NvInferRuntimeBase.h:1114
Definition: NvInferRuntime.h:1100
virtual int32_t getAliasedInput(int32_t outputIndex) noexcept
Communicates to TensorRT that the output at the specified output index is aliased to the input at the...
Definition: NvInferRuntime.h:1136
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1102
IRefitter * createInferRefitter(ICudaEngine &engine, ILogger &logger) noexcept
Create an instance of an IRefitter class.
Definition: NvInferRuntime.h:4639
IRuntime * createInferRuntime(ILogger &logger) noexcept
Create an instance of an IRuntime class.
Definition: NvInferRuntime.h:4629
The TensorRT API version 1 namespace.
uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferRuntime.h:2377
v_1_0::IPluginCapability IPluginCapability
Definition: NvInferRuntime.h:682
v_1_0::IOutputAllocator IOutputAllocator
Definition: NvInferRuntime.h:3512
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:73
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:176
@ kSUB
Substract the second element from the first.
@ kSUM
Sum of the two operands.
@ kPROD
Product of the two operands.
@ kFLOOR_DIV
Floor division of the first element by the second.
@ kEQUAL
1 if operands are equal, 0 otherwise.
@ kMIN
Minimum of the two operands.
@ kLESS
1 if first operand is less than second operand, 0 otherwise.
@ kMAX
Maximum of the two operands.
@ kCEIL_DIV
Division rounding up.
v_1_0::IPluginV3OneCore IPluginV3OneCore
Definition: NvInferRuntime.h:1153
TensorIOMode
Definition of tensor IO Mode.
Definition: NvInferRuntimeBase.h:1100
HardwareCompatibilityLevel
Describes requirements of compatibility with GPU architectures other than that of the GPU on which th...
Definition: NvInfer.h:8642
SerializationFlag
List of valid flags that the engine can enable when serializing the bytes.
Definition: NvInferRuntime.h:2418
@ kEXCLUDE_WEIGHTS
Exclude the weights that can be refitted.
v_1_0::IProfiler IProfiler
Definition: NvInferRuntime.h:1298
uint32_t TempfileControlFlags
Represents a collection of one or more TempfileControlFlag values combined using bitwise-OR operation...
Definition: NvInferRuntime.h:1376
PluginCapabilityType
Enumerates the different capability types a IPluginV3 object may have.
Definition: NvInferRuntimePlugin.h:910
constexpr int32_t EnumMax< WeightsRole >() noexcept
Maximum number of elements in WeightsRole enum.
Definition: NvInferRuntime.h:1319
char_t AsciiChar
Definition: NvInferRuntimeBase.h:107
TensorRTPhase
Indicates a phase of operation of TensorRT.
Definition: NvInferRuntimePlugin.h:925
@ kV2_DYNAMICEXT
IPluginV2DynamicExt.
constexpr int32_t EnumMax< LayerInformationFormat >() noexcept
Definition: NvInferRuntime.h:4453
DataType
The type of weights and tensors.
Definition: NvInferRuntimeBase.h:135
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:1330
@ kSCALE
Scale layer.
@ kCONSTANT
Constant layer.
v_1_0::IDebugListener IDebugListener
Definition: NvInferRuntime.h:3553
TempfileControlFlag
Flags used to control TensorRT's behavior when creating executable temporary files.
Definition: NvInferRuntime.h:1353
@ kALLOW_IN_MEMORY_FILES
Allow creating and loading files in-memory (or unnamed files).
constexpr int32_t EnumMax< OptProfileSelector >() noexcept
Number of different values of OptProfileSelector enum.
Definition: NvInferRuntime.h:2133
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:1308
@ kSHIFT
shift part of IScaleLayer
@ kANY
Any other weights role.
@ kBIAS
bias for IConvolutionLayer or IDeconvolutionLayer
@ kKERNEL
kernel for IConvolutionLayer or IDeconvolutionLayer
constexpr int32_t EnumMax< ProfilingVerbosity >() noexcept
Maximum number of profile verbosity levels in ProfilingVerbosity enum.
Definition: NvInferRuntime.h:2397
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
Definition: NvInferRuntime.h:2389
@ kLAYER_NAMES_ONLY
Print only the layer names. This is the default setting.
@ kDETAILED
Print detailed layer information including layer names and layer parameters.
TacticSource
List of tactic sources for TensorRT.
Definition: NvInferRuntime.h:2340
TensorFormat PluginFormat
PluginFormat is reserved for backward compatibility.
Definition: NvInferRuntimePlugin.h:48
v_1_0::IPluginV3OneRuntime IPluginV3OneRuntime
Definition: NvInferRuntime.h:1177
@ kMIN
Minimum of the two elements.
constexpr int32_t EnumMax< TempfileControlFlag >() noexcept
Maximum number of elements in TempfileControlFlag enum.
Definition: NvInferRuntime.h:1365
uint32_t SerializationFlags
Represents one or more SerializationFlag values using binary OR operations, e.g., 1U << Serialization...
Definition: NvInferRuntime.h:2408
v_1_0::IPluginV3OneBuild IPluginV3OneBuild
Definition: NvInferRuntime.h:1165
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntimeBase.h:250
ExecutionContextAllocationStrategy
Different memory allocation behaviors for IExecutionContext.
Definition: NvInferRuntime.h:2522
@ kSTATIC
Default static allocation with the maximum size across all profiles.
@ kUSER_MANAGED
The user supplies custom allocation to the execution context.
@ kON_PROFILE_CHANGE
Reallocate for a profile when it's selected.
constexpr int32_t EnumMax< TacticSource >() noexcept
Maximum number of tactic sources in TacticSource enum.
Definition: NvInferRuntime.h:2366
LayerInformationFormat
The format in which the IEngineInspector prints the layer information.
Definition: NvInferRuntime.h:4445
@ kJSON
Print layer information in JSON format.
@ kONELINE
Print layer information in one line per layer.
v_1_0::IGpuAsyncAllocator IGpuAsyncAllocator
Definition: NvInferRuntime.h:4846
@ kMAX
Maximum over elements.
constexpr int32_t EnumMax< DeviceType >() noexcept
Maximum number of elements in DeviceType enum.
Definition: NvInferRuntime.h:1337
constexpr int32_t EnumMax< DimensionOperation >() noexcept
Maximum number of elements in DimensionOperation enum.
Definition: NvInferRuntime.h:190
constexpr int32_t EnumMax< ExecutionContextAllocationStrategy >() noexcept
Maximum number of memory allocation strategies in ExecutionContextAllocationStrategy enum.
Definition: NvInferRuntime.h:2534
constexpr int32_t EnumMax< SerializationFlag >() noexcept
Maximum number of serialization flags in SerializationFlag enum.
Definition: NvInferRuntime.h:2425
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntime.h:201
@ kHOST
Data stored on host.
@ kDEVICE
Data stored on device.
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:2121
@ kOPT
This is used to set or get the value that is used in the optimization (kernel selection).
uint32_t AllocatorFlags
Definition: NvInferRuntimeBase.h:461
Summarizes tensors that a plugin might see for an input or output.
Definition: NvInferRuntime.h:359
Dims min
Lower bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:364
Dims max
Upper bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:367
Dims opt
Optimum value of tensor’s dimensions specified for auto-tuning.
Definition: NvInferRuntime.h:370
PluginTensorDesc desc
Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of ...
Definition: NvInferRuntime.h:361
Plugin field collection struct.
Definition: NvInferRuntimePlugin.h:897
Fields that a plugin might see for an input or output.
Definition: NvInferRuntimePlugin.h:67
Declaration of EnumMaxImpl struct to store maximum number of elements in an enumeration type.
Definition: NvInferRuntimeBase.h:120

  Copyright © 2024 NVIDIA Corporation
  Privacy Policy | Manage My Privacy | Do Not Sell or Share My Data | Terms of Service | Accessibility | Corporate Policies | Product Security | Contact