TensorRT 10.0.0
NvInferRuntime.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef NV_INFER_RUNTIME_H
19#define NV_INFER_RUNTIME_H
20
26
27#include "NvInferImpl.h"
29
30namespace nvinfer1
31{
32
33class IExecutionContext;
34class ICudaEngine;
35class IPluginFactory;
36class IEngineInspector;
37
46
48{
49protected:
50 INoCopy() = default;
51 virtual ~INoCopy() = default;
52 INoCopy(INoCopy const& other) = delete;
53 INoCopy& operator=(INoCopy const& other) = delete;
54 INoCopy(INoCopy&& other) = delete;
55 INoCopy& operator=(INoCopy&& other) = delete;
56};
57
72enum class EngineCapability : int32_t
73{
78 kSTANDARD = 0,
79
86 kSAFETY = 1,
87
94};
95
96namespace impl
97{
99template <>
101{
102 static constexpr int32_t kVALUE = 3;
103};
104} // namespace impl
105
121{
122public:
124 void const* values;
125 int64_t count;
126};
127
138class IHostMemory : public INoCopy
139{
140public:
141 virtual ~IHostMemory() noexcept = default;
142
144 void* data() const noexcept
145 {
146 return mImpl->data();
147 }
148
150 std::size_t size() const noexcept
151 {
152 return mImpl->size();
153 }
154
156 DataType type() const noexcept
157 {
158 return mImpl->type();
159 }
160
161protected:
162 apiv::VHostMemory* mImpl;
163};
164
175enum class DimensionOperation : int32_t
176{
177 kSUM = 0,
178 kPROD = 1,
179 kMAX = 2,
180 kMIN = 3,
181 kSUB = 4,
182 kEQUAL = 5,
183 kLESS = 6,
184 kFLOOR_DIV = 7,
185 kCEIL_DIV = 8
186};
187
189template <>
190constexpr inline int32_t EnumMax<DimensionOperation>() noexcept
191{
192 return 9;
193}
194
200enum class TensorLocation : int32_t
201{
202 kDEVICE = 0,
203 kHOST = 1,
204};
205
206namespace impl
207{
209template <>
211{
212 static constexpr int32_t kVALUE = 2;
213};
214} // namespace impl
215
229{
230public:
234 bool isConstant() const noexcept
235 {
236 return mImpl->isConstant();
237 }
238
245 int64_t getConstantValue() const noexcept
246 {
247 return mImpl->getConstantValue();
248 }
249
250protected:
251 apiv::VDimensionExpr* mImpl;
252 virtual ~IDimensionExpr() noexcept = default;
253
254public:
260 bool isSizeTensor() const noexcept
261 {
262 return mImpl->isSizeTensor();
263 }
264};
265
283class IExprBuilder : public INoCopy
284{
285public:
289 IDimensionExpr const* constant(int64_t value) noexcept
290 {
291 return mImpl->constant(value);
292 }
293
301 DimensionOperation op, IDimensionExpr const& first, IDimensionExpr const& second) noexcept
302 {
303 return mImpl->operation(op, first, second);
304 }
305
306protected:
307 apiv::VExprBuilder* mImpl;
308 virtual ~IExprBuilder() noexcept = default;
309
310public:
335 IDimensionExpr const* declareSizeTensor(int32_t outputIndex, IDimensionExpr const& opt, IDimensionExpr const& upper)
336 {
337 return mImpl->declareSizeTensor(outputIndex, opt, upper);
338 }
339};
340
347{
348public:
349 int32_t nbDims;
351};
352
359{
362
365
368
371};
372
404{
405public:
406 IPluginV2DynamicExt* clone() const noexcept override = 0;
407
432 virtual DimsExprs getOutputDimensions(
433 int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept = 0;
434
438 static constexpr int32_t kFORMAT_COMBINATION_LIMIT = 100;
439
472 virtual bool supportsFormatCombination(
473 int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
474
512 virtual void configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs,
513 DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
514
524 virtual size_t getWorkspaceSize(PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs,
525 int32_t nbOutputs) const noexcept = 0;
526
539 virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc,
540 void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0;
541
542protected:
550 int32_t getTensorRTVersion() const noexcept override
551 {
552 return (static_cast<int32_t>(PluginVersion::kV2_DYNAMICEXT) << 24 | (NV_TENSORRT_VERSION & 0xFFFFFF));
553 }
554
555 virtual ~IPluginV2DynamicExt() noexcept {}
556
557private:
558 // Following are obsolete base class methods, and must not be implemented or used.
559
563 void configurePlugin(Dims const*, int32_t, Dims const*, int32_t, DataType const*, DataType const*, bool const*,
564 bool const*, PluginFormat, int32_t) noexcept override final
565 {
566 }
567
571 bool supportsFormat(DataType, PluginFormat) const noexcept override final
572 {
573 return false;
574 }
575
579 Dims getOutputDimensions(int32_t, Dims const*, int32_t) noexcept override final
580 {
581 return Dims{-1, {}};
582 }
583
591 TRT_DEPRECATED bool isOutputBroadcastAcrossBatch(int32_t, bool const*, int32_t) const noexcept override final
592 {
593 return false;
594 }
595
603 TRT_DEPRECATED bool canBroadcastInputAcrossBatch(int32_t) const noexcept override final
604 {
605 return true;
606 }
607
611 size_t getWorkspaceSize(int32_t) const noexcept override final
612 {
613 return 0;
614 }
615
619 int32_t enqueue(int32_t, void const* const*, void* const*, void*, cudaStream_t) noexcept override final
620 {
621 return 1;
622 }
623};
624
639{
640public:
645 virtual IGpuAllocator* getGpuAllocator() const noexcept = 0;
646
651 virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;
652 virtual ~IPluginResourceContext() noexcept = default;
653
654protected:
658 IPluginResourceContext& operator=(IPluginResourceContext const&) & = default;
660};
661
662namespace v_1_0
663{
665{
666};
667} // namespace v_1_0
668
683
684namespace v_1_0
685{
687{
688public:
692 InterfaceInfo getInterfaceInfo() const noexcept override
693 {
694 return InterfaceInfo{"PLUGIN", 1, 0};
695 }
696
709
720 virtual IPluginV3* clone() noexcept = 0;
721};
722
723} // namespace v_1_0
724
736using IPluginV3 = v_1_0::IPluginV3;
737
738namespace v_1_0
739{
741{
742public:
746 InterfaceInfo getInterfaceInfo() const noexcept override
747 {
748 return InterfaceInfo{"PLUGIN_V3ONE_CORE", 1, 0};
749 }
750
759 virtual AsciiChar const* getPluginName() const noexcept = 0;
760
769 virtual AsciiChar const* getPluginVersion() const noexcept = 0;
770
780 virtual AsciiChar const* getPluginNamespace() const noexcept = 0;
781};
782
784{
785public:
791 static constexpr int32_t kDEFAULT_FORMAT_COMBINATION_LIMIT = 100;
792
796 InterfaceInfo getInterfaceInfo() const noexcept override
797 {
798 return InterfaceInfo{"PLUGIN_V3ONE_BUILD", 1, 0};
799 }
800
818 virtual int32_t configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs,
819 DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
820
837 virtual int32_t getOutputDataTypes(
838 DataType* outputTypes, int32_t nbOutputs, const DataType* inputTypes, int32_t nbInputs) const noexcept = 0;
839
855 virtual int32_t getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, DimsExprs const* shapeInputs,
856 int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept = 0;
857
893 int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
894
900 virtual int32_t getNbOutputs() const noexcept = 0;
901
911 virtual size_t getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs,
912 DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept
913 {
914 return 0;
915 }
916
932 virtual int32_t getValidTactics(int32_t* tactics, int32_t nbTactics) noexcept
933 {
934 return 0;
935 }
936
940 virtual int32_t getNbTactics() noexcept
941 {
942 return 0;
943 }
944
956 virtual char const* getTimingCacheID() noexcept
957 {
958 return nullptr;
959 }
960
964 virtual int32_t getFormatCombinationLimit() noexcept
965 {
966 return kDEFAULT_FORMAT_COMBINATION_LIMIT;
967 }
968
975 virtual char const* getMetadataString() noexcept
976 {
977 return nullptr;
978 }
979};
980
982{
983public:
987 InterfaceInfo getInterfaceInfo() const noexcept override
988 {
989 return InterfaceInfo{"PLUGIN_V3ONE_RUNTIME", 1, 0};
990 }
991
999 virtual int32_t setTactic(int32_t tactic) noexcept
1000 {
1001 return 0;
1002 }
1003
1022 virtual int32_t onShapeChange(
1023 PluginTensorDesc const* in, int32_t nbInputs, PluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
1024
1038 virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc,
1039 void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0;
1040
1060 virtual IPluginV3* attachToContext(IPluginResourceContext* context) noexcept = 0;
1061
1067
1071 virtual PluginFieldCollection const* getFieldsToSerialize() noexcept = 0;
1072};
1073} // namespace v_1_0
1074
1085
1097
1109
1110namespace v_1_0
1111{
1113{
1114public:
1118 InterfaceInfo getInterfaceInfo() const noexcept override
1119 {
1120 return InterfaceInfo{"PLUGIN CREATOR_V3ONE", 1, 0};
1121 }
1122
1136 AsciiChar const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept = 0;
1137
1144 virtual PluginFieldCollection const* getFieldNames() noexcept = 0;
1145
1152 virtual AsciiChar const* getPluginName() const noexcept = 0;
1153
1160 virtual AsciiChar const* getPluginVersion() const noexcept = 0;
1161
1168 virtual AsciiChar const* getPluginNamespace() const noexcept = 0;
1169
1171 virtual ~IPluginCreatorV3One() = default;
1172
1173protected:
1176 IPluginCreatorV3One& operator=(IPluginCreatorV3One const&) & = default;
1177 IPluginCreatorV3One& operator=(IPluginCreatorV3One&&) & = default;
1178};
1179} // namespace v_1_0
1180
1190
1191namespace v_1_0
1192{
1194{
1195public:
1203 virtual void reportLayerTime(char const* layerName, float ms) noexcept = 0;
1204
1205 virtual ~IProfiler() noexcept {}
1206};
1207} // namespace v_1_0
1208
1221
1229enum class WeightsRole : int32_t
1230{
1231 kKERNEL = 0,
1232 kBIAS = 1,
1233 kSHIFT = 2,
1234 kSCALE = 3,
1235 kCONSTANT = 4,
1236 kANY = 5,
1237};
1238
1240template <>
1241constexpr inline int32_t EnumMax<WeightsRole>() noexcept
1242{
1243 return 6;
1244}
1245
1251enum class DeviceType : int32_t
1252{
1253 kGPU = 0,
1254 kDLA = 1,
1255};
1256
1258template <>
1259constexpr inline int32_t EnumMax<DeviceType>() noexcept
1260{
1261 return 2;
1262}
1263
1274enum class TempfileControlFlag : int32_t
1275{
1278
1283};
1284
1286template <>
1287constexpr inline int32_t EnumMax<TempfileControlFlag>() noexcept
1288{
1289 return 2;
1290}
1291
1298using TempfileControlFlags = uint32_t;
1299
1307class IRuntime : public INoCopy
1308{
1309public:
1310 virtual ~IRuntime() noexcept = default;
1311
1323 void setDLACore(int32_t dlaCore) noexcept
1324 {
1325 mImpl->setDLACore(dlaCore);
1326 }
1327
1333 int32_t getDLACore() const noexcept
1334 {
1335 return mImpl->getDLACore();
1336 }
1337
1341 int32_t getNbDLACores() const noexcept
1342 {
1343 return mImpl->getNbDLACores();
1344 }
1345
1356 void setGpuAllocator(IGpuAllocator* allocator) noexcept
1357 {
1358 mImpl->setGpuAllocator(allocator);
1359 }
1360
1372 //
1375 void setErrorRecorder(IErrorRecorder* recorder) noexcept
1376 {
1377 mImpl->setErrorRecorder(recorder);
1378 }
1379
1391 {
1392 return mImpl->getErrorRecorder();
1393 }
1394
1405 ICudaEngine* deserializeCudaEngine(void const* blob, std::size_t size) noexcept
1406 {
1407 return mImpl->deserializeCudaEngine(blob, size);
1408 }
1409
1424 {
1425 return mImpl->deserializeCudaEngine(streamReader);
1426 }
1427
1433 ILogger* getLogger() const noexcept
1434 {
1435 return mImpl->getLogger();
1436 }
1437
1448 bool setMaxThreads(int32_t maxThreads) noexcept
1449 {
1450 return mImpl->setMaxThreads(maxThreads);
1451 }
1452
1462 int32_t getMaxThreads() const noexcept
1463 {
1464 return mImpl->getMaxThreads();
1465 }
1466
1497 void setTemporaryDirectory(char const* path) noexcept
1498 {
1499 return mImpl->setTemporaryDirectory(path);
1500 }
1501
1508 char const* getTemporaryDirectory() const noexcept
1509 {
1510 return mImpl->getTemporaryDirectory();
1511 }
1512
1525 {
1526 return mImpl->setTempfileControlFlags(flags);
1527 }
1528
1537 {
1538 return mImpl->getTempfileControlFlags();
1539 }
1540
1547 {
1548 return mImpl->getPluginRegistry();
1549 }
1550
1564 IRuntime* loadRuntime(char const* path) noexcept
1565 {
1566 return mImpl->loadRuntime(path);
1567 }
1568
1576 void setEngineHostCodeAllowed(bool allowed) noexcept
1577 {
1578 return mImpl->setEngineHostCodeAllowed(allowed);
1579 }
1580
1586 bool getEngineHostCodeAllowed() const noexcept
1587 {
1588 return mImpl->getEngineHostCodeAllowed();
1589 }
1590
1591protected:
1592 apiv::VRuntime* mImpl;
1593};
1594
1602class IRefitter : public INoCopy
1603{
1604public:
1605 virtual ~IRefitter() noexcept = default;
1606
1622 bool setWeights(char const* layerName, WeightsRole role, Weights weights) noexcept
1623 {
1624 return mImpl->setWeights(layerName, role, weights);
1625 }
1626
1639 bool refitCudaEngine() noexcept
1640 {
1641 return mImpl->refitCudaEngine();
1642 }
1643
1660 int32_t getMissing(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
1661 {
1662 return mImpl->getMissing(size, layerNames, roles);
1663 }
1664
1677 int32_t getAll(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
1678 {
1679 return mImpl->getAll(size, layerNames, roles);
1680 }
1681
1697 bool setDynamicRange(char const* tensorName, float min, float max) noexcept
1698 {
1699 return mImpl->setDynamicRange(tensorName, min, max);
1700 }
1701
1711 float getDynamicRangeMin(char const* tensorName) const noexcept
1712 {
1713 return mImpl->getDynamicRangeMin(tensorName);
1714 }
1715
1725 float getDynamicRangeMax(char const* tensorName) const noexcept
1726 {
1727 return mImpl->getDynamicRangeMax(tensorName);
1728 }
1729
1741 int32_t getTensorsWithDynamicRange(int32_t size, char const** tensorNames) const noexcept
1742 {
1743 return mImpl->getTensorsWithDynamicRange(size, tensorNames);
1744 }
1745
1757 //
1760 void setErrorRecorder(IErrorRecorder* recorder) noexcept
1761 {
1762 mImpl->setErrorRecorder(recorder);
1763 }
1764
1776 {
1777 return mImpl->getErrorRecorder();
1778 }
1779
1800 bool setNamedWeights(char const* name, Weights weights) noexcept
1801 {
1802 return mImpl->setNamedWeights(name, weights);
1803 }
1804
1820 int32_t getMissingWeights(int32_t size, char const** weightsNames) noexcept
1821 {
1822 return mImpl->getMissingWeights(size, weightsNames);
1823 }
1824
1836 int32_t getAllWeights(int32_t size, char const** weightsNames) noexcept
1837 {
1838 return mImpl->getAllWeights(size, weightsNames);
1839 }
1840
1846 ILogger* getLogger() const noexcept
1847 {
1848 return mImpl->getLogger();
1849 }
1850
1862 bool setMaxThreads(int32_t maxThreads) noexcept
1863 {
1864 return mImpl->setMaxThreads(maxThreads);
1865 }
1866
1876 int32_t getMaxThreads() const noexcept
1877 {
1878 return mImpl->getMaxThreads();
1879 }
1880
1903 bool setNamedWeights(char const* name, Weights weights, TensorLocation location) noexcept
1904 {
1905 return mImpl->setNamedWeightsWithLocation(name, weights, location);
1906 }
1907
1919 Weights getNamedWeights(char const* weightsName) const noexcept
1920 {
1921 return mImpl->getNamedWeights(weightsName);
1922 }
1923
1935 TensorLocation getWeightsLocation(char const* weightsName) const noexcept
1936 {
1937 return mImpl->getWeightsLocation(weightsName);
1938 }
1939
1951 bool unsetNamedWeights(char const* weightsName) noexcept
1952 {
1953 return mImpl->unsetNamedWeights(weightsName);
1954 }
1955
1967 void setWeightsValidation(bool weightsValidation) noexcept
1968 {
1969 return mImpl->setWeightsValidation(weightsValidation);
1970 }
1971
1975 bool getWeightsValidation() const noexcept
1976 {
1977 return mImpl->getWeightsValidation();
1978 }
1979
1997 bool refitCudaEngineAsync(cudaStream_t stream) noexcept
1998 {
1999 return mImpl->refitCudaEngineAsync(stream);
2000 }
2001
2015 Weights getWeightsPrototype(char const* weightsName) const noexcept
2016 {
2017 return mImpl->getWeightsPrototype(weightsName);
2018 }
2019
2020protected:
2021 apiv::VRefitter* mImpl;
2022};
2023
2034enum class OptProfileSelector : int32_t
2035{
2036 kMIN = 0,
2037 kOPT = 1,
2038 kMAX = 2
2039};
2040
2046template <>
2047constexpr inline int32_t EnumMax<OptProfileSelector>() noexcept
2048{
2049 return 3;
2050}
2051
2075{
2076public:
2104 bool setDimensions(char const* inputName, OptProfileSelector select, Dims const& dims) noexcept
2105 {
2106 return mImpl->setDimensions(inputName, select, dims);
2107 }
2108
2116 Dims getDimensions(char const* inputName, OptProfileSelector select) const noexcept
2117 {
2118 return mImpl->getDimensions(inputName, select);
2119 }
2120
2165 char const* inputName, OptProfileSelector select, int32_t const* values, int32_t nbValues) noexcept
2166 {
2167 return mImpl->setShapeValues(inputName, select, values, nbValues);
2168 }
2169
2178 int32_t getNbShapeValues(char const* inputName) const noexcept
2179 {
2180 return mImpl->getNbShapeValues(inputName);
2181 }
2182
2190 int32_t const* getShapeValues(char const* inputName, OptProfileSelector select) const noexcept
2191 {
2192 return mImpl->getShapeValues(inputName, select);
2193 }
2194
2208 bool setExtraMemoryTarget(float target) noexcept
2209 {
2210 return mImpl->setExtraMemoryTarget(target);
2211 }
2212
2220 float getExtraMemoryTarget() const noexcept
2221 {
2222 return mImpl->getExtraMemoryTarget();
2223 }
2224
2237 bool isValid() const noexcept
2238 {
2239 return mImpl->isValid();
2240 }
2241
2242protected:
2243 apiv::VOptimizationProfile* mImpl;
2244 virtual ~IOptimizationProfile() noexcept = default;
2245};
2246
2254enum class TacticSource : int32_t
2255{
2260
2264
2269
2274
2278};
2279
2280template <>
2281constexpr inline int32_t EnumMax<TacticSource>() noexcept
2282{
2283 return 5;
2284}
2285
2292using TacticSources = uint32_t;
2293
2303enum class ProfilingVerbosity : int32_t
2304{
2305 kLAYER_NAMES_ONLY = 0,
2306 kNONE = 1,
2307 kDETAILED = 2,
2308};
2309
2311template <>
2312constexpr inline int32_t EnumMax<ProfilingVerbosity>() noexcept
2313{
2314 return 3;
2315}
2316
2323using SerializationFlags = uint32_t;
2324
2332enum class SerializationFlag : int32_t
2333{
2334 kEXCLUDE_WEIGHTS = 0,
2336};
2337
2339template <>
2340constexpr inline int32_t EnumMax<SerializationFlag>() noexcept
2341{
2342 return 2;
2343}
2344
2353{
2354public:
2355 virtual ~ISerializationConfig() noexcept = default;
2356
2368 bool setFlags(SerializationFlags serializationFlags) noexcept
2369 {
2370 return mImpl->setFlags(serializationFlags);
2371 }
2372
2381 {
2382 return mImpl->getFlags();
2383 }
2384
2392 bool clearFlag(SerializationFlag serializationFlag) noexcept
2393 {
2394 return mImpl->clearFlag(serializationFlag);
2395 }
2396
2404 bool setFlag(SerializationFlag serializationFlag) noexcept
2405 {
2406 return mImpl->setFlag(serializationFlag);
2407 }
2408
2416 bool getFlag(SerializationFlag serializationFlag) const noexcept
2417 {
2418 return mImpl->getFlag(serializationFlag);
2419 }
2420
2421protected:
2422 apiv::VSerializationConfig* mImpl;
2423};
2424
2437{
2438 kSTATIC = 0,
2439 kON_PROFILE_CHANGE = 1,
2440 kUSER_MANAGED = 2,
2441};
2442
2448template <>
2449constexpr inline int32_t EnumMax<ExecutionContextAllocationStrategy>() noexcept
2450{
2451 return 3;
2452}
2453
2461class ICudaEngine : public INoCopy
2462{
2463public:
2464 virtual ~ICudaEngine() noexcept = default;
2465
2476 Dims getTensorShape(char const* tensorName) const noexcept
2477 {
2478 return mImpl->getTensorShape(tensorName);
2479 }
2480
2491 DataType getTensorDataType(char const* tensorName) const noexcept
2492 {
2493 return mImpl->getTensorDataType(tensorName);
2494 }
2495
2505 int32_t getNbLayers() const noexcept
2506 {
2507 return mImpl->getNbLayers();
2508 }
2509
2519 IHostMemory* serialize() const noexcept
2520 {
2521 return mImpl->serialize();
2522 }
2523
2538 {
2539 return mImpl->createExecutionContext(strategy);
2540 }
2541
2554 TensorLocation getTensorLocation(char const* tensorName) const noexcept
2555 {
2556 return mImpl->getTensorLocation(tensorName);
2557 }
2558
2574 bool isShapeInferenceIO(char const* tensorName) const noexcept
2575 {
2576 return mImpl->isShapeInferenceIO(tensorName);
2577 }
2578
2588 TensorIOMode getTensorIOMode(char const* tensorName) const noexcept
2589 {
2590 return mImpl->getTensorIOMode(tensorName);
2591 }
2592
2601 {
2602 return mImpl->createExecutionContextWithoutDeviceMemory();
2603 }
2604
2610 size_t getDeviceMemorySize() const noexcept
2611 {
2612 return mImpl->getDeviceMemorySize();
2613 }
2614
2620 size_t getDeviceMemorySizeForProfile(int32_t profileIndex) const noexcept
2621 {
2622 return mImpl->getDeviceMemorySizeForProfile(profileIndex);
2623 }
2624
2630 bool isRefittable() const noexcept
2631 {
2632 return mImpl->isRefittable();
2633 }
2634
2651 int32_t getTensorBytesPerComponent(char const* tensorName) const noexcept
2652 {
2653 return mImpl->getTensorBytesPerComponent(tensorName);
2654 }
2655
2669 int32_t getTensorBytesPerComponent(char const* tensorName, int32_t profileIndex) const noexcept
2670 {
2671 return mImpl->getTensorBytesPerComponentV2(tensorName, profileIndex);
2672 }
2673
2690 int32_t getTensorComponentsPerElement(char const* tensorName) const noexcept
2691 {
2692 return mImpl->getTensorComponentsPerElement(tensorName);
2693 }
2694
2708 int32_t getTensorComponentsPerElement(char const* tensorName, int32_t profileIndex) const noexcept
2709 {
2710 return mImpl->getTensorComponentsPerElementV2(tensorName, profileIndex);
2711 }
2712
2723 TensorFormat getTensorFormat(char const* tensorName) const noexcept
2724 {
2725 return mImpl->getTensorFormat(tensorName);
2726 }
2727
2737 TensorFormat getTensorFormat(char const* tensorName, int32_t profileIndex) const noexcept
2738 {
2739 return mImpl->getTensorFormatV2(tensorName, profileIndex);
2740 }
2741
2761 char const* getTensorFormatDesc(char const* tensorName) const noexcept
2762 {
2763 return mImpl->getTensorFormatDesc(tensorName);
2764 }
2765
2784 char const* getTensorFormatDesc(char const* tensorName, int32_t profileIndex) const noexcept
2785 {
2786 return mImpl->getTensorFormatDescV2(tensorName, profileIndex);
2787 }
2788
2801 int32_t getTensorVectorizedDim(char const* tensorName) const noexcept
2802 {
2803 return mImpl->getTensorVectorizedDim(tensorName);
2804 }
2805
2817 int32_t getTensorVectorizedDim(char const* tensorName, int32_t profileIndex) const noexcept
2818 {
2819 return mImpl->getTensorVectorizedDimV2(tensorName, profileIndex);
2820 }
2821
2832 char const* getName() const noexcept
2833 {
2834 return mImpl->getName();
2835 }
2836
2843 int32_t getNbOptimizationProfiles() const noexcept
2844 {
2845 return mImpl->getNbOptimizationProfiles();
2846 }
2847
2863 Dims getProfileShape(char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
2864 {
2865 return mImpl->getProfileShape(tensorName, profileIndex, select);
2866 }
2867
2884 int32_t const* getProfileTensorValues(char const* tensorName, int32_t profileIndex, OptProfileSelector select) const
2885 noexcept
2886 {
2887 return mImpl->getProfileTensorValues(tensorName, profileIndex, select);
2888 }
2889
2901 {
2902 return mImpl->getEngineCapability();
2903 }
2904
2919 void setErrorRecorder(IErrorRecorder* recorder) noexcept
2920 {
2921 return mImpl->setErrorRecorder(recorder);
2922 }
2923
2935 {
2936 return mImpl->getErrorRecorder();
2937 }
2938
2949 {
2950 return mImpl->hasImplicitBatchDimension();
2951 }
2952
2965 {
2966 return mImpl->getTacticSources();
2967 }
2968
2977 {
2978 return mImpl->getProfilingVerbosity();
2979 }
2980
2987 {
2988 return mImpl->createEngineInspector();
2989 }
2990
2999 int32_t getNbIOTensors() const noexcept
3000 {
3001 return mImpl->getNbIOTensors();
3002 }
3003
3011 char const* getIOTensorName(int32_t index) const noexcept
3012 {
3013 return mImpl->getIOTensorName(index);
3014 }
3015
3025 {
3026 return mImpl->getHardwareCompatibilityLevel();
3027 }
3028
3039 int32_t getNbAuxStreams() const noexcept
3040 {
3041 return mImpl->getNbAuxStreams();
3042 }
3043
3050 {
3051 return mImpl->createSerializationConfig();
3052 }
3053
3064 {
3065 return mImpl->serializeWithConfig(config);
3066 }
3067
3106 bool setWeightStreamingBudget(int64_t gpuMemoryBudget) noexcept
3107 {
3108 return mImpl->setWeightStreamingBudget(gpuMemoryBudget);
3109 }
3110
3124 int64_t getWeightStreamingBudget() const noexcept
3125 {
3126 return mImpl->getWeightStreamingBudget();
3127 }
3128
3146 int64_t getMinimumWeightStreamingBudget() const noexcept
3147 {
3148 return mImpl->getMinimumWeightStreamingBudget();
3149 }
3150
3163 int64_t getStreamableWeightsSize() const noexcept
3164 {
3165 return mImpl->getStreamableWeightsSize();
3166 }
3167
3177 bool isDebugTensor(char const* name) const noexcept
3178 {
3179 return mImpl->isDebugTensor(name);
3180 }
3181
3182protected:
3183 apiv::VCudaEngine* mImpl;
3184};
3185
3186namespace v_1_0
3187{
3189{
3190public:
3194 InterfaceInfo getInterfaceInfo() const noexcept override
3195 {
3196 return {"IOutputAllocator", 1, 0};
3197 }
3198
3218 char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment) noexcept
3219 {
3220 return nullptr;
3221 }
3222
3246 char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment, cudaStream_t /*stream*/)
3247 {
3248 return reallocateOutput(tensorName, currentMemory, size, alignment);
3249 }
3250
3259 virtual void notifyShape(char const* tensorName, Dims const& dims) noexcept = 0;
3260};
3261} // namespace v_1_0
3262
3271
3272namespace v_1_0
3273{
3275{
3276public:
3280 InterfaceInfo getInterfaceInfo() const noexcept override
3281 {
3282 return {"IDebugListener", 1, 0};
3283 }
3284
3298 virtual bool processDebugTensor(void const* addr, TensorLocation location, DataType type, Dims const& shape,
3299 char const* name, cudaStream_t stream)
3300 = 0;
3301
3302 ~IDebugListener() override = default;
3303};
3304} // namespace v_1_0
3305
3312
3324{
3325public:
3326 virtual ~IExecutionContext() noexcept = default;
3327
3336 void setDebugSync(bool sync) noexcept
3337 {
3338 mImpl->setDebugSync(sync);
3339 }
3340
3346 bool getDebugSync() const noexcept
3347 {
3348 return mImpl->getDebugSync();
3349 }
3350
3356 void setProfiler(IProfiler* profiler) noexcept
3357 {
3358 mImpl->setProfiler(profiler);
3359 }
3360
3366 IProfiler* getProfiler() const noexcept
3367 {
3368 return mImpl->getProfiler();
3369 }
3370
3376 ICudaEngine const& getEngine() const noexcept
3377 {
3378 return mImpl->getEngine();
3379 }
3380
3390 void setName(char const* name) noexcept
3391 {
3392 mImpl->setName(name);
3393 }
3394
3400 char const* getName() const noexcept
3401 {
3402 return mImpl->getName();
3403 }
3404
3421 void setDeviceMemory(void* memory) noexcept
3422 {
3423 mImpl->setDeviceMemory(memory);
3424 }
3425
3442 Dims getTensorStrides(char const* tensorName) const noexcept
3443 {
3444 return mImpl->getTensorStrides(tensorName);
3445 }
3446
3447public:
3457 int32_t getOptimizationProfile() const noexcept
3458 {
3459 return mImpl->getOptimizationProfile();
3460 }
3461
3475 bool setInputShape(char const* tensorName, Dims const& dims) noexcept
3476 {
3477 return mImpl->setInputShape(tensorName, dims);
3478 }
3479
3512 Dims getTensorShape(char const* tensorName) const noexcept
3513 {
3514 return mImpl->getTensorShape(tensorName);
3515 }
3516
3528 bool allInputDimensionsSpecified() const noexcept
3529 {
3530 return mImpl->allInputDimensionsSpecified();
3531 }
3532
3546 {
3547 return mImpl->allInputShapesSpecified();
3548 }
3549
3564 void setErrorRecorder(IErrorRecorder* recorder) noexcept
3565 {
3566 mImpl->setErrorRecorder(recorder);
3567 }
3568
3580 {
3581 return mImpl->getErrorRecorder();
3582 }
3583
3596 bool executeV2(void* const* bindings) noexcept
3597 {
3598 return mImpl->executeV2(bindings);
3599 }
3600
3640 bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
3641 {
3642 return mImpl->setOptimizationProfileAsync(profileIndex, stream);
3643 }
3644
3656 void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
3657 {
3658 mImpl->setEnqueueEmitsProfile(enqueueEmitsProfile);
3659 }
3660
3668 bool getEnqueueEmitsProfile() const noexcept
3669 {
3670 return mImpl->getEnqueueEmitsProfile();
3671 }
3672
3698 bool reportToProfiler() const noexcept
3699 {
3700 return mImpl->reportToProfiler();
3701 }
3702
3739 bool setTensorAddress(char const* tensorName, void* data) noexcept
3740 {
3741 return mImpl->setTensorAddress(tensorName, data);
3742 }
3743
3756 void const* getTensorAddress(char const* tensorName) const noexcept
3757 {
3758 return mImpl->getTensorAddress(tensorName);
3759 }
3760
3779 bool setOutputTensorAddress(char const* tensorName, void* data) noexcept
3780 {
3781 return mImpl->setOutputTensorAddress(tensorName, data);
3782 }
3783
3801 bool setInputTensorAddress(char const* tensorName, void const* data) noexcept
3802 {
3803 return mImpl->setInputTensorAddress(tensorName, data);
3804 }
3805
3820 void* getOutputTensorAddress(char const* tensorName) const noexcept
3821 {
3822 return mImpl->getOutputTensorAddress(tensorName);
3823 }
3824
3853 int32_t inferShapes(int32_t nbMaxNames, char const** tensorNames) noexcept
3854 {
3855 return mImpl->inferShapes(nbMaxNames, tensorNames);
3856 }
3857
3871 {
3872 return mImpl->updateDeviceMemorySizeForShapes();
3873 }
3874
3886 bool setInputConsumedEvent(cudaEvent_t event) noexcept
3887 {
3888 return mImpl->setInputConsumedEvent(event);
3889 }
3890
3896 cudaEvent_t getInputConsumedEvent() const noexcept
3897 {
3898 return mImpl->getInputConsumedEvent();
3899 }
3900
3915 bool setOutputAllocator(char const* tensorName, IOutputAllocator* outputAllocator) noexcept
3916 {
3917 return mImpl->setOutputAllocator(tensorName, outputAllocator);
3918 }
3919
3928 IOutputAllocator* getOutputAllocator(char const* tensorName) const noexcept
3929 {
3930 return mImpl->getOutputAllocator(tensorName);
3931 }
3932
3946 int64_t getMaxOutputSize(char const* tensorName) const noexcept
3947 {
3948 return mImpl->getMaxOutputSize(tensorName);
3949 }
3950
3967 {
3968 return mImpl->setTemporaryStorageAllocator(allocator);
3969 }
3970
3977 {
3978 return mImpl->getTemporaryStorageAllocator();
3979 }
3980
4000 bool enqueueV3(cudaStream_t stream) noexcept
4001 {
4002 return mImpl->enqueueV3(stream);
4003 }
4004
4016 void setPersistentCacheLimit(size_t size) noexcept
4017 {
4018 mImpl->setPersistentCacheLimit(size);
4019 }
4020
4027 size_t getPersistentCacheLimit() const noexcept
4028 {
4029 return mImpl->getPersistentCacheLimit();
4030 }
4031
4051 bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
4052 {
4053 return mImpl->setNvtxVerbosity(verbosity);
4054 }
4055
4064 {
4065 return mImpl->getNvtxVerbosity();
4066 }
4067
4094 void setAuxStreams(cudaStream_t* auxStreams, int32_t nbStreams) noexcept
4095 {
4096 mImpl->setAuxStreams(auxStreams, nbStreams);
4097 }
4098
4106 bool setDebugListener(IDebugListener* listener) noexcept
4107 {
4108 return mImpl->setDebugListener(listener);
4109 }
4110
4117 {
4118 return mImpl->getDebugListener();
4119 }
4120
4135 bool setTensorDebugState(char const* name, bool flag) noexcept
4136 {
4137 return mImpl->setTensorDebugState(name, flag);
4138 }
4139
4148 bool setAllTensorsDebugState(bool flag) noexcept
4149 {
4150 return mImpl->setAllTensorsDebugState(flag);
4151 }
4152
4158 bool getDebugState(char const* name) const noexcept
4159 {
4160 return mImpl->getDebugState(name);
4161 }
4162
4163protected:
4164 apiv::VExecutionContext* mImpl;
4165}; // class IExecutionContext
4166
4174enum class LayerInformationFormat : int32_t
4175{
4176 kONELINE = 0,
4177 kJSON = 1,
4178};
4179
4182template <>
4183constexpr inline int32_t EnumMax<LayerInformationFormat>() noexcept
4184{
4185 return 2;
4186}
4187
4204{
4205public:
4206 virtual ~IEngineInspector() noexcept = default;
4207
4220 bool setExecutionContext(IExecutionContext const* context) noexcept
4221 {
4222 return mImpl->setExecutionContext(context);
4223 }
4224
4233 {
4234 return mImpl->getExecutionContext();
4235 }
4236
4257 char const* getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
4258 {
4259 return mImpl->getLayerInformation(layerIndex, format);
4260 }
4261
4280 char const* getEngineInformation(LayerInformationFormat format) const noexcept
4281 {
4282 return mImpl->getEngineInformation(format);
4283 }
4284
4299 void setErrorRecorder(IErrorRecorder* recorder) noexcept
4300 {
4301 mImpl->setErrorRecorder(recorder);
4302 }
4303
4315 {
4316 return mImpl->getErrorRecorder();
4317 }
4318
4319protected:
4320 apiv::VEngineInspector* mImpl;
4321}; // class IEngineInspector
4322
4323} // namespace nvinfer1
4324
4329extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(void* logger, int32_t version) noexcept;
4330
4335extern "C" TENSORRTAPI void* createInferRefitter_INTERNAL(void* engine, void* logger, int32_t version) noexcept;
4336
4341
4347extern "C" TENSORRTAPI nvinfer1::ILogger* getLogger() noexcept;
4348
4349namespace nvinfer1
4350{
4351namespace // unnamed namespace avoids linkage surprises when linking objects built with different versions of this
4352 // header.
4353{
4359inline IRuntime* createInferRuntime(ILogger& logger) noexcept
4360{
4361 return static_cast<IRuntime*>(createInferRuntime_INTERNAL(&logger, NV_TENSORRT_VERSION));
4362}
4363
4369inline IRefitter* createInferRefitter(ICudaEngine& engine, ILogger& logger) noexcept
4370{
4371 return static_cast<IRefitter*>(createInferRefitter_INTERNAL(&engine, &logger, NV_TENSORRT_VERSION));
4372}
4373
4374} // namespace
4375
4387template <typename T>
4389{
4390public:
4392 {
4393 getPluginRegistry()->registerCreator(instance, "");
4394 }
4395
4396private:
4398 T instance{};
4399};
4400
4401} // namespace nvinfer1
4402
4403#define REGISTER_TENSORRT_PLUGIN(name) \
4404 static nvinfer1::PluginRegistrar<name> pluginRegistrar##name {}
4405
4406namespace nvinfer1
4407{
4417{
4418public:
4426 virtual ILogger* findLogger() = 0;
4427
4428protected:
4429 virtual ~ILoggerFinder() = default;
4430};
4431
4434namespace v_1_0
4435{
4436
4438{
4439public:
4441 ~IGpuAsyncAllocator() override = default;
4442
4472 void* allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags,
4473 cudaStream_t /*stream*/) noexcept override = 0;
4474
4500 bool deallocateAsync(void* const memory, cudaStream_t /*stream*/) noexcept override = 0;
4501
4525 uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept override
4526 {
4527 return allocateAsync(size, alignment, flags, nullptr);
4528 }
4529
4548 TRT_DEPRECATED bool deallocate(void* const memory) noexcept override
4549 {
4550 return deallocateAsync(memory, nullptr);
4551 }
4552
4556 InterfaceInfo getInterfaceInfo() const noexcept override
4557 {
4558 return {"IGpuAllocator", 1, 0};
4559 }
4560};
4561} // namespace v_1_0
4562
4577
4578} // namespace nvinfer1
4579
4583extern "C" TENSORRTAPI int32_t getInferLibMajorVersion() noexcept;
4587extern "C" TENSORRTAPI int32_t getInferLibMinorVersion() noexcept;
4591extern "C" TENSORRTAPI int32_t getInferLibPatchVersion() noexcept;
4595extern "C" TENSORRTAPI int32_t getInferLibBuildVersion() noexcept;
4596
4597#endif // NV_INFER_RUNTIME_H
nvinfer1::ILogger * getLogger() noexcept
Return the logger object.
int32_t getInferLibMajorVersion() noexcept
Return the library major version number.
nvinfer1::IPluginRegistry * getPluginRegistry() noexcept
Return the plugin registry.
int32_t getInferLibPatchVersion() noexcept
Return the library patch version number.
int32_t getInferLibMinorVersion() noexcept
Return the library minor version number.
int32_t getInferLibBuildVersion() noexcept
Return the library build version number.
#define TENSORRTAPI
Definition: NvInferRuntimeBase.h:59
#define NV_TENSORRT_VERSION
Definition: NvInferRuntimeBase.h:87
#define TRT_DEPRECATED
Definition: NvInferRuntimeBase.h:45
#define TRT_DEPRECATED_ENUM
Definition: NvInferRuntimeBase.h:46
Structure to define the dimensions of a tensor.
Definition: NvInferRuntimeBase.h:195
static constexpr int32_t MAX_DIMS
The maximum rank (number of dimensions) supported for a tensor.
Definition: NvInferRuntimeBase.h:198
Analog of class Dims with expressions instead of constants for the dimensions.
Definition: NvInferRuntime.h:347
IDimensionExpr const * d[Dims::MAX_DIMS]
The extent of each dimension.
Definition: NvInferRuntime.h:350
int32_t nbDims
The number of dimensions.
Definition: NvInferRuntime.h:349
An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:2462
int32_t getTensorBytesPerComponent(char const *tensorName) const noexcept
Return the number of bytes per component of an element, or -1 if the provided name does not map to an...
Definition: NvInferRuntime.h:2651
ISerializationConfig * createSerializationConfig() noexcept
Create a serialization configuration object.
Definition: NvInferRuntime.h:3049
int32_t const * getProfileTensorValues(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values (not dimensions) for an input tensor given its name under ...
Definition: NvInferRuntime.h:2884
int64_t getMinimumWeightStreamingBudget() const noexcept
The minimum number of bytes of GPU memory required by network weights for successful weight streaming...
Definition: NvInferRuntime.h:3146
char const * getIOTensorName(int32_t index) const noexcept
Return name of an IO tensor.
Definition: NvInferRuntime.h:3011
int64_t getWeightStreamingBudget() const noexcept
Returns the current weight streaming device memory budget in bytes.
Definition: NvInferRuntime.h:3124
EngineCapability getEngineCapability() const noexcept
Determine what execution capability this engine has.
Definition: NvInferRuntime.h:2900
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:2934
TensorFormat getTensorFormat(char const *tensorName, int32_t profileIndex) const noexcept
Return the tensor format of given profile, or TensorFormat::kLINEAR if the provided name does not map...
Definition: NvInferRuntime.h:2737
TRT_DEPRECATED bool hasImplicitBatchDimension() const noexcept
Query whether the engine was built with an implicit batch dimension.
Definition: NvInferRuntime.h:2948
apiv::VCudaEngine * mImpl
Definition: NvInferRuntime.h:3183
IExecutionContext * createExecutionContext(ExecutionContextAllocationStrategy strategy=ExecutionContextAllocationStrategy::kSTATIC) noexcept
Create an execution context and specify the strategy for allocating internal activation memory.
Definition: NvInferRuntime.h:2536
char const * getTensorFormatDesc(char const *tensorName) const noexcept
Return the human readable description of the tensor format, or empty string if the provided name does...
Definition: NvInferRuntime.h:2761
Dims getProfileShape(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for an input tensor given its name under an optimizati...
Definition: NvInferRuntime.h:2863
int32_t getNbAuxStreams() const noexcept
Return the number of auxiliary streams used by this engine.
Definition: NvInferRuntime.h:3039
int64_t getStreamableWeightsSize() const noexcept
Get the total size in bytes of all streamable weights.
Definition: NvInferRuntime.h:3163
DataType getTensorDataType(char const *tensorName) const noexcept
Determine the required data type for a buffer from its tensor name.
Definition: NvInferRuntime.h:2491
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:2919
size_t getDeviceMemorySize() const noexcept
Return the maximum device memory required by the context over all profiles.
Definition: NvInferRuntime.h:2610
TacticSources getTacticSources() const noexcept
return the tactic sources required by this engine.
Definition: NvInferRuntime.h:2964
IHostMemory * serializeWithConfig(ISerializationConfig &config) const noexcept
Serialize the network to a stream with the provided SerializationConfig.
Definition: NvInferRuntime.h:3063
virtual ~ICudaEngine() noexcept=default
bool setWeightStreamingBudget(int64_t gpuMemoryBudget) noexcept
Limit the maximum amount of GPU memory usable for network weights in bytes.
Definition: NvInferRuntime.h:3106
size_t getDeviceMemorySizeForProfile(int32_t profileIndex) const noexcept
Return the maximum device memory required by the context for a profile.
Definition: NvInferRuntime.h:2620
bool isDebugTensor(char const *name) const noexcept
Check if a tensor is marked as a debug tensor.
Definition: NvInferRuntime.h:3177
int32_t getTensorVectorizedDim(char const *tensorName, int32_t profileIndex) const noexcept
Return the dimension index that the buffer is vectorized of given profile, or -1 if the provided name...
Definition: NvInferRuntime.h:2817
char const * getName() const noexcept
Returns the name of the network associated with the engine.
Definition: NvInferRuntime.h:2832
ProfilingVerbosity getProfilingVerbosity() const noexcept
Return the ProfilingVerbosity the builder config was set to when the engine was built.
Definition: NvInferRuntime.h:2976
bool isShapeInferenceIO(char const *tensorName) const noexcept
True if tensor is required as input for shape calculations or is output from shape calculations.
Definition: NvInferRuntime.h:2574
int32_t getTensorVectorizedDim(char const *tensorName) const noexcept
Return the dimension index that the buffer is vectorized, or -1 if the provided name does not map to ...
Definition: NvInferRuntime.h:2801
int32_t getTensorComponentsPerElement(char const *tensorName, int32_t profileIndex) const noexcept
Return the number of components included in one element of given profile, or -1 if the provided name ...
Definition: NvInferRuntime.h:2708
TensorFormat getTensorFormat(char const *tensorName) const noexcept
Return the tensor format, or TensorFormat::kLINEAR if the provided name does not map to an input or o...
Definition: NvInferRuntime.h:2723
IHostMemory * serialize() const noexcept
Serialize the network to a stream.
Definition: NvInferRuntime.h:2519
TensorLocation getTensorLocation(char const *tensorName) const noexcept
Get whether an input or output tensor must be on GPU or CPU.
Definition: NvInferRuntime.h:2554
IEngineInspector * createEngineInspector() const noexcept
Create a new engine inspector which prints the layer information in an engine or an execution context...
Definition: NvInferRuntime.h:2986
int32_t getTensorBytesPerComponent(char const *tensorName, int32_t profileIndex) const noexcept
Return the number of bytes per component of an element of given profile, or -1 if the provided name d...
Definition: NvInferRuntime.h:2669
HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept
Return the hardware compatibility level of this engine.
Definition: NvInferRuntime.h:3024
int32_t getNbOptimizationProfiles() const noexcept
Get the number of optimization profiles defined for this engine.
Definition: NvInferRuntime.h:2843
TRT_DEPRECATED IExecutionContext * createExecutionContextWithoutDeviceMemory() noexcept
create an execution context without any device memory allocated
Definition: NvInferRuntime.h:2600
char const * getTensorFormatDesc(char const *tensorName, int32_t profileIndex) const noexcept
Return the human readable description of the tensor format of given profile, or empty string if the p...
Definition: NvInferRuntime.h:2784
TensorIOMode getTensorIOMode(char const *tensorName) const noexcept
Determine whether a tensor is an input or output tensor.
Definition: NvInferRuntime.h:2588
int32_t getNbLayers() const noexcept
Get the number of layers in the network.
Definition: NvInferRuntime.h:2505
int32_t getNbIOTensors() const noexcept
Return number of IO tensors.
Definition: NvInferRuntime.h:2999
int32_t getTensorComponentsPerElement(char const *tensorName) const noexcept
Return the number of components included in one element, or -1 if the provided name does not map to a...
Definition: NvInferRuntime.h:2690
bool isRefittable() const noexcept
Return true if an engine can be refit.
Definition: NvInferRuntime.h:2630
An IDimensionExpr represents an integer expression constructed from constants, input dimensions,...
Definition: NvInferRuntime.h:229
bool isConstant() const noexcept
Return true if expression is a build-time constant.
Definition: NvInferRuntime.h:234
bool isSizeTensor() const noexcept
Return true if this denotes the value of a size tensor.
Definition: NvInferRuntime.h:260
virtual ~IDimensionExpr() noexcept=default
apiv::VDimensionExpr * mImpl
Definition: NvInferRuntime.h:251
int64_t getConstantValue() const noexcept
Get the value of the constant.
Definition: NvInferRuntime.h:245
An engine inspector which prints out the layer information of an engine or an execution context.
Definition: NvInferRuntime.h:4204
char const * getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
Get a string describing the information about a specific layer in the current engine or the execution...
Definition: NvInferRuntime.h:4257
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:4314
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:4299
IExecutionContext const * getExecutionContext() const noexcept
Get the context currently being inspected.
Definition: NvInferRuntime.h:4232
apiv::VEngineInspector * mImpl
Definition: NvInferRuntime.h:4320
virtual ~IEngineInspector() noexcept=default
char const * getEngineInformation(LayerInformationFormat format) const noexcept
Get a string describing the information about all the layers in the current engine or the execution c...
Definition: NvInferRuntime.h:4280
Context for executing inference using an engine, with functionally unsafe features.
Definition: NvInferRuntime.h:3324
IOutputAllocator * getOutputAllocator(char const *tensorName) const noexcept
Get output allocator associated with output tensor of given name, or nullptr if the provided name doe...
Definition: NvInferRuntime.h:3928
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:3579
bool reportToProfiler() const noexcept
Calculate layer timing info for the current optimization profile in IExecutionContext and update the ...
Definition: NvInferRuntime.h:3698
virtual ~IExecutionContext() noexcept=default
void setDeviceMemory(void *memory) noexcept
Set the device memory for use by this execution context.
Definition: NvInferRuntime.h:3421
TRT_DEPRECATED bool allInputShapesSpecified() const noexcept
Whether all input shape bindings have been specified.
Definition: NvInferRuntime.h:3545
bool setTensorDebugState(char const *name, bool flag) noexcept
Set debug state of tensor given the tensor name.
Definition: NvInferRuntime.h:4135
char const * getName() const noexcept
Return the name of the execution context.
Definition: NvInferRuntime.h:3400
IGpuAllocator * getTemporaryStorageAllocator() const noexcept
Get allocator set by setTemporaryStorageAllocator.
Definition: NvInferRuntime.h:3976
void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
Set whether enqueue emits layer timing to the profiler.
Definition: NvInferRuntime.h:3656
Dims getTensorShape(char const *tensorName) const noexcept
Return the shape of the given input or output.
Definition: NvInferRuntime.h:3512
bool getDebugState(char const *name) const noexcept
Definition: NvInferRuntime.h:4158
bool setInputShape(char const *tensorName, Dims const &dims) noexcept
Set shape of given input.
Definition: NvInferRuntime.h:3475
bool executeV2(void *const *bindings) noexcept
Synchronously execute a network.
Definition: NvInferRuntime.h:3596
bool getEnqueueEmitsProfile() const noexcept
Get the enqueueEmitsProfile state.
Definition: NvInferRuntime.h:3668
void const * getTensorAddress(char const *tensorName) const noexcept
Get memory address bound to given input or output tensor, or nullptr if the provided name does not ma...
Definition: NvInferRuntime.h:3756
bool setOutputAllocator(char const *tensorName, IOutputAllocator *outputAllocator) noexcept
Set output allocator to use for output tensor of given name. Pass nullptr to outputAllocator to unset...
Definition: NvInferRuntime.h:3915
bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
Select an optimization profile for the current context with async semantics.
Definition: NvInferRuntime.h:3640
apiv::VExecutionContext * mImpl
Definition: NvInferRuntime.h:4164
bool setOutputTensorAddress(char const *tensorName, void *data) noexcept
Set the memory address for a given output tensor.
Definition: NvInferRuntime.h:3779
void setPersistentCacheLimit(size_t size) noexcept
Set the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:4016
size_t getPersistentCacheLimit() const noexcept
Get the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:4027
bool setAllTensorsDebugState(bool flag) noexcept
Definition: NvInferRuntime.h:4148
ICudaEngine const & getEngine() const noexcept
Get the associated engine.
Definition: NvInferRuntime.h:3376
ProfilingVerbosity getNvtxVerbosity() const noexcept
Get the NVTX verbosity of the execution context.
Definition: NvInferRuntime.h:4063
size_t updateDeviceMemorySizeForShapes() noexcept
Recompute the internal activation buffer sizes based on the current input shapes, and return the tota...
Definition: NvInferRuntime.h:3870
void setAuxStreams(cudaStream_t *auxStreams, int32_t nbStreams) noexcept
Set the auxiliary streams that TensorRT should launch kernels on in the next enqueueV3() call.
Definition: NvInferRuntime.h:4094
int64_t getMaxOutputSize(char const *tensorName) const noexcept
Get upper bound on an output tensor's size, in bytes, based on the current optimization profile and i...
Definition: NvInferRuntime.h:3946
int32_t inferShapes(int32_t nbMaxNames, char const **tensorNames) noexcept
Run shape calculations.
Definition: NvInferRuntime.h:3853
bool setDebugListener(IDebugListener *listener) noexcept
Set DebugListener for this execution context.
Definition: NvInferRuntime.h:4106
bool setTensorAddress(char const *tensorName, void *data) noexcept
Set memory address for given input or output tensor.
Definition: NvInferRuntime.h:3739
bool setTemporaryStorageAllocator(IGpuAllocator *allocator) noexcept
Specify allocator to use for internal temporary storage.
Definition: NvInferRuntime.h:3966
void * getOutputTensorAddress(char const *tensorName) const noexcept
Get memory address for given output.
Definition: NvInferRuntime.h:3820
bool enqueueV3(cudaStream_t stream) noexcept
Enqueue inference on a stream.
Definition: NvInferRuntime.h:4000
IDebugListener * getDebugListener() noexcept
Get the DebugListener of this execution context.
Definition: NvInferRuntime.h:4116
int32_t getOptimizationProfile() const noexcept
Get the index of the currently selected optimization profile.
Definition: NvInferRuntime.h:3457
bool setInputTensorAddress(char const *tensorName, void const *data) noexcept
Set memory address for given input.
Definition: NvInferRuntime.h:3801
bool getDebugSync() const noexcept
Get the debug sync flag.
Definition: NvInferRuntime.h:3346
bool setInputConsumedEvent(cudaEvent_t event) noexcept
Mark input as consumed.
Definition: NvInferRuntime.h:3886
Dims getTensorStrides(char const *tensorName) const noexcept
Return the strides of the buffer for the given tensor name.
Definition: NvInferRuntime.h:3442
bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
Set the verbosity of the NVTX markers in the execution context.
Definition: NvInferRuntime.h:4051
IProfiler * getProfiler() const noexcept
Get the profiler.
Definition: NvInferRuntime.h:3366
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:3564
bool allInputDimensionsSpecified() const noexcept
Whether all dynamic dimensions of input tensors have been specified.
Definition: NvInferRuntime.h:3528
void setProfiler(IProfiler *profiler) noexcept
Set the profiler.
Definition: NvInferRuntime.h:3356
void setName(char const *name) noexcept
Set the name of the execution context.
Definition: NvInferRuntime.h:3390
cudaEvent_t getInputConsumedEvent() const noexcept
The event associated with consuming the input.
Definition: NvInferRuntime.h:3896
Object for constructing IDimensionExpr.
Definition: NvInferRuntime.h:284
IDimensionExpr const * operation(DimensionOperation op, IDimensionExpr const &first, IDimensionExpr const &second) noexcept
Get the operation.
Definition: NvInferRuntime.h:300
virtual ~IExprBuilder() noexcept=default
IDimensionExpr const * constant(int64_t value) noexcept
Return pointer to IDimensionExp for given value.
Definition: NvInferRuntime.h:289
apiv::VExprBuilder * mImpl
Definition: NvInferRuntime.h:307
IDimensionExpr const * declareSizeTensor(int32_t outputIndex, IDimensionExpr const &opt, IDimensionExpr const &upper)
Declare a size tensor at the given output index, with the specified auto-tuning formula and upper bou...
Definition: NvInferRuntime.h:335
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:139
void * data() const noexcept
A pointer to the raw data that is owned by the library.
Definition: NvInferRuntime.h:144
DataType type() const noexcept
The type of the memory that was allocated.
Definition: NvInferRuntime.h:156
std::size_t size() const noexcept
The size in bytes of the data that was allocated.
Definition: NvInferRuntime.h:150
apiv::VHostMemory * mImpl
Definition: NvInferRuntime.h:162
virtual ~IHostMemory() noexcept=default
A virtual base class to find a logger. Allows a plugin to find an instance of a logger if it needs to...
Definition: NvInferRuntime.h:4417
virtual ILogger * findLogger()=0
Get the logger used by the engine or execution context which called the plugin method.
virtual ~ILoggerFinder()=default
Application-implemented logging interface for the builder, refitter and runtime.
Definition: NvInferRuntimeBase.h:676
Forward declaration of IEngineInspector for use by other interfaces.
Definition: NvInferRuntime.h:48
INoCopy & operator=(INoCopy &&other)=delete
INoCopy(INoCopy const &other)=delete
INoCopy(INoCopy &&other)=delete
virtual ~INoCopy()=default
INoCopy & operator=(INoCopy const &other)=delete
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:2075
int32_t const * getShapeValues(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2190
apiv::VOptimizationProfile * mImpl
Definition: NvInferRuntime.h:2243
Dims getDimensions(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:2116
virtual ~IOptimizationProfile() noexcept=default
float getExtraMemoryTarget() const noexcept
Get the extra memory target that has been defined for this profile.
Definition: NvInferRuntime.h:2220
bool setExtraMemoryTarget(float target) noexcept
Set a target for extra GPU memory that may be used by this profile.
Definition: NvInferRuntime.h:2208
bool setDimensions(char const *inputName, OptProfileSelector select, Dims const &dims) noexcept
Set the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:2104
bool setShapeValues(char const *inputName, OptProfileSelector select, int32_t const *values, int32_t nbValues) noexcept
Set the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2164
bool isValid() const noexcept
Check whether the optimization profile can be passed to an IBuilderConfig object.
Definition: NvInferRuntime.h:2237
int32_t getNbShapeValues(char const *inputName) const noexcept
Get the number of values for an input shape tensor.
Definition: NvInferRuntime.h:2178
Single registration point for all plugins in an application. It is used to find plugin implementation...
Definition: NvInferRuntimeCommon.h:54
virtual bool registerCreator(IPluginCreator &creator, AsciiChar const *const pluginNamespace) noexcept=0
Register a plugin creator implementing IPluginCreator. Returns false if any plugin creator with the s...
Interface for plugins to access per context resources provided by TensorRT.
Definition: NvInferRuntime.h:639
virtual IErrorRecorder * getErrorRecorder() const noexcept=0
Get the error recorder associated with the resource context.
virtual IGpuAllocator * getGpuAllocator() const noexcept=0
Get the GPU allocator associated with the resource context.
Similar to IPluginV2Ext, but with support for dynamic shapes.
Definition: NvInferRuntime.h:404
IPluginV2DynamicExt * clone() const noexcept override=0
Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin...
virtual ~IPluginV2DynamicExt() noexcept
Definition: NvInferRuntime.h:555
Plugin class for user-implemented layers.
Definition: NvInferRuntimePlugin.h:462
Updates weights in an engine.
Definition: NvInferRuntime.h:1603
bool refitCudaEngineAsync(cudaStream_t stream) noexcept
Enqueue weights refitting of the associated engine on the given stream.
Definition: NvInferRuntime.h:1997
int32_t getMaxThreads() const noexcept
get the maximum number of threads that can be used by the refitter.
Definition: NvInferRuntime.h:1876
TensorLocation getWeightsLocation(char const *weightsName) const noexcept
Get location for the weights associated with the given name.
Definition: NvInferRuntime.h:1935
bool setNamedWeights(char const *name, Weights weights) noexcept
Specify new weights of given name.
Definition: NvInferRuntime.h:1800
int32_t getAllWeights(int32_t size, char const **weightsNames) noexcept
Get names of all weights that could be refit.
Definition: NvInferRuntime.h:1836
bool setDynamicRange(char const *tensorName, float min, float max) noexcept
Definition: NvInferRuntime.h:1697
ILogger * getLogger() const noexcept
get the logger with which the refitter was created
Definition: NvInferRuntime.h:1846
int32_t getTensorsWithDynamicRange(int32_t size, char const **tensorNames) const noexcept
Get names of all tensors that have refittable dynamic ranges.
Definition: NvInferRuntime.h:1741
float getDynamicRangeMin(char const *tensorName) const noexcept
Get minimum of dynamic range.
Definition: NvInferRuntime.h:1711
bool refitCudaEngine() noexcept
Refits associated engine.
Definition: NvInferRuntime.h:1639
int32_t getMissingWeights(int32_t size, char const **weightsNames) noexcept
Get names of missing weights.
Definition: NvInferRuntime.h:1820
int32_t getMissing(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of missing weights.
Definition: NvInferRuntime.h:1660
Weights getNamedWeights(char const *weightsName) const noexcept
Get weights associated with the given name.
Definition: NvInferRuntime.h:1919
bool unsetNamedWeights(char const *weightsName) noexcept
Unset weights associated with the given name.
Definition: NvInferRuntime.h:1951
Weights getWeightsPrototype(char const *weightsName) const noexcept
Get the Weights prototype associated with the given name.
Definition: NvInferRuntime.h:2015
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:1862
bool setNamedWeights(char const *name, Weights weights, TensorLocation location) noexcept
Specify new weights on a specified device of given name.
Definition: NvInferRuntime.h:1903
void setWeightsValidation(bool weightsValidation) noexcept
Set whether to validate weights during refitting.
Definition: NvInferRuntime.h:1967
float getDynamicRangeMax(char const *tensorName) const noexcept
Get maximum of dynamic range.
Definition: NvInferRuntime.h:1725
apiv::VRefitter * mImpl
Definition: NvInferRuntime.h:2021
int32_t getAll(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of all weights that could be refit.
Definition: NvInferRuntime.h:1677
virtual ~IRefitter() noexcept=default
bool getWeightsValidation() const noexcept
Get whether to validate weights values during refitting.
Definition: NvInferRuntime.h:1975
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:1760
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:1775
Allows a serialized functionally unsafe engine to be deserialized.
Definition: NvInferRuntime.h:1308
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:1448
IRuntime * loadRuntime(char const *path) noexcept
Load IRuntime from the file.
Definition: NvInferRuntime.h:1564
ICudaEngine * deserializeCudaEngine(IStreamReader &streamReader)
Deserialize an engine from a stream.
Definition: NvInferRuntime.h:1423
bool getEngineHostCodeAllowed() const noexcept
Get whether the runtime is allowed to deserialize engines with host executable code.
Definition: NvInferRuntime.h:1586
TempfileControlFlags getTempfileControlFlags() const noexcept
Get the tempfile control flags for this runtime.
Definition: NvInferRuntime.h:1536
void setEngineHostCodeAllowed(bool allowed) noexcept
Set whether the runtime is allowed to deserialize engines with host executable code.
Definition: NvInferRuntime.h:1576
virtual ~IRuntime() noexcept=default
void setTemporaryDirectory(char const *path) noexcept
Set the directory that will be used by this runtime for temporary files.
Definition: NvInferRuntime.h:1497
IPluginRegistry & getPluginRegistry() noexcept
Get the local plugin registry that can be used by the runtime.
Definition: NvInferRuntime.h:1546
apiv::VRuntime * mImpl
Definition: NvInferRuntime.h:1592
int32_t getNbDLACores() const noexcept
Returns number of DLA hardware cores accessible or 0 if DLA is unavailable.
Definition: NvInferRuntime.h:1341
ICudaEngine * deserializeCudaEngine(void const *blob, std::size_t size) noexcept
Deserialize an engine from host memory.
Definition: NvInferRuntime.h:1405
void setTempfileControlFlags(TempfileControlFlags flags) noexcept
Set the tempfile control flags for this runtime.
Definition: NvInferRuntime.h:1524
int32_t getDLACore() const noexcept
Get the DLA core that the engine executes on.
Definition: NvInferRuntime.h:1333
void setGpuAllocator(IGpuAllocator *allocator) noexcept
Set the GPU allocator.
Definition: NvInferRuntime.h:1356
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:1390
ILogger * getLogger() const noexcept
get the logger with which the runtime was created
Definition: NvInferRuntime.h:1433
int32_t getMaxThreads() const noexcept
Get the maximum number of threads that can be used by the runtime.
Definition: NvInferRuntime.h:1462
char const * getTemporaryDirectory() const noexcept
Get the directory that will be used by this runtime for temporary files.
Definition: NvInferRuntime.h:1508
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:1375
Holds properties for configuring an engine to serialize the binary.
Definition: NvInferRuntime.h:2353
virtual ~ISerializationConfig() noexcept=default
bool clearFlag(SerializationFlag serializationFlag) noexcept
clear a serialization flag.
Definition: NvInferRuntime.h:2392
bool setFlag(SerializationFlag serializationFlag) noexcept
Set a serialization flag.
Definition: NvInferRuntime.h:2404
SerializationFlags getFlags() const noexcept
Get the serialization flags for this config.
Definition: NvInferRuntime.h:2380
bool getFlag(SerializationFlag serializationFlag) const noexcept
Returns true if the serialization flag is set.
Definition: NvInferRuntime.h:2416
apiv::VSerializationConfig * mImpl
Definition: NvInferRuntime.h:2422
An Interface class for version control.
Definition: NvInferRuntimeBase.h:393
Version information associated with a TRT interface.
Definition: NvInferRuntimeBase.h:358
Register the plugin creator to the registry The static registry object will be instantiated when the ...
Definition: NvInferRuntime.h:4389
PluginRegistrar()
Definition: NvInferRuntime.h:4391
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:121
DataType type
The type of the weights.
Definition: NvInferRuntime.h:123
int64_t count
The number of weights in the array.
Definition: NvInferRuntime.h:125
void const * values
The weight values, in a contiguous array.
Definition: NvInferRuntime.h:124
Definition: NvInferRuntime.h:3275
virtual bool processDebugTensor(void const *addr, TensorLocation location, DataType type, Dims const &shape, char const *name, cudaStream_t stream)=0
Callback function that is called when a debug tensor’s value is updated and the debug state of the te...
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:3280
~IDebugListener() override=default
Definition: NvInferRuntimeBase.h:850
Definition: NvInferRuntimeBase.h:462
Definition: NvInferRuntime.h:4438
bool deallocateAsync(void *const memory, cudaStream_t) noexcept override=0
A thread-safe callback implemented by the application to handle stream-ordered asynchronous release o...
void * allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t) noexcept override=0
A thread-safe callback implemented by the application to handle stream-ordered asynchronous acquisiti...
TRT_DEPRECATED void * allocate(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept override
A thread-safe callback implemented by the application to handle acquisition of GPU memory.
Definition: NvInferRuntime.h:4524
TRT_DEPRECATED bool deallocate(void *const memory) noexcept override
A thread-safe callback implemented by the application to handle release of GPU memory.
Definition: NvInferRuntime.h:4548
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:4556
~IGpuAsyncAllocator() override=default
Definition: NvInferRuntime.h:3189
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:3194
virtual void * reallocateOutputAsync(char const *tensorName, void *currentMemory, uint64_t size, uint64_t alignment, cudaStream_t)
Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated....
Definition: NvInferRuntime.h:3245
virtual void notifyShape(char const *tensorName, Dims const &dims) noexcept=0
Called by TensorRT when the shape of the output tensor is known.
virtual TRT_DEPRECATED void * reallocateOutput(char const *tensorName, void *currentMemory, uint64_t size, uint64_t alignment) noexcept
Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated....
Definition: NvInferRuntime.h:3217
Definition: NvInferRuntime.h:665
Definition: NvInferRuntimePlugin.h:929
Definition: NvInferRuntime.h:1113
virtual PluginFieldCollection const * getFieldNames() noexcept=0
Return a list of fields that need to be passed to createPlugin() when creating a plugin for use in th...
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1118
virtual IPluginV3 * createPlugin(AsciiChar const *name, PluginFieldCollection const *fc, TensorRTPhase phase) noexcept=0
Return a plugin object. Return nullptr in case of error.
Definition: NvInferRuntime.h:687
virtual IPluginV3 * clone() noexcept=0
Clone the plugin object. This copies over internal plugin parameters and returns a new plugin object ...
virtual IPluginCapability * getCapabilityInterface(PluginCapabilityType type) noexcept=0
Return a pointer to plugin object implementing the specified PluginCapabilityType.
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:692
Definition: NvInferRuntime.h:784
virtual int32_t getFormatCombinationLimit() noexcept
Return the maximum number of format combinations that will be timed by TensorRT during the build phas...
Definition: NvInferRuntime.h:964
virtual int32_t getNbOutputs() const noexcept=0
Get the number of outputs from the plugin.
virtual int32_t configurePlugin(DynamicPluginTensorDesc const *in, int32_t nbInputs, DynamicPluginTensorDesc const *out, int32_t nbOutputs) noexcept=0
Configure the plugin.
virtual int32_t getNbTactics() noexcept
Query for the number of custom tactics the plugin intends to use.
Definition: NvInferRuntime.h:940
virtual char const * getMetadataString() noexcept
Query for a string representing the configuration of the plugin. May be called anytime after plugin c...
Definition: NvInferRuntime.h:975
virtual char const * getTimingCacheID() noexcept
Called to query the suffix to use for the timing cache ID. May be called anytime after plugin creatio...
Definition: NvInferRuntime.h:956
virtual bool supportsFormatCombination(int32_t pos, DynamicPluginTensorDesc const *inOut, int32_t nbInputs, int32_t nbOutputs) noexcept=0
Return true if plugin supports the format and datatype for the input/output indexed by pos.
virtual int32_t getOutputDataTypes(DataType *outputTypes, int32_t nbOutputs, const DataType *inputTypes, int32_t nbInputs) const noexcept=0
Provide the data types of the plugin outputs if the input tensors have the data types provided.
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:796
virtual int32_t getOutputShapes(DimsExprs const *inputs, int32_t nbInputs, DimsExprs const *shapeInputs, int32_t nbShapeInputs, DimsExprs *outputs, int32_t nbOutputs, IExprBuilder &exprBuilder) noexcept=0
Provide expressions for computing dimensions of the output tensors from dimensions of the input tenso...
virtual int32_t getValidTactics(int32_t *tactics, int32_t nbTactics) noexcept
Query for any custom tactics that the plugin intends to use.
Definition: NvInferRuntime.h:932
Definition: NvInferRuntime.h:741
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:746
virtual AsciiChar const * getPluginName() const noexcept=0
Return the plugin name. Should match the plugin name returned by the corresponding plugin creator.
Definition: NvInferRuntime.h:982
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:987
virtual int32_t onShapeChange(PluginTensorDesc const *in, int32_t nbInputs, PluginTensorDesc const *out, int32_t nbOutputs) noexcept=0
Called when a plugin is being prepared for execution for specific dimensions. This could happen multi...
virtual PluginFieldCollection const * getFieldsToSerialize() noexcept=0
Get the plugin fields which should be serialized.
virtual int32_t setTactic(int32_t tactic) noexcept
Set the tactic to be used in the subsequent call to enqueue(). If no custom tactics were advertised,...
Definition: NvInferRuntime.h:999
virtual int32_t enqueue(PluginTensorDesc const *inputDesc, PluginTensorDesc const *outputDesc, void const *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept=0
Execute the layer.
virtual IPluginV3 * attachToContext(IPluginResourceContext *context) noexcept=0
Clone the plugin, attach the cloned plugin object to a execution context and grant the cloned plugin ...
Definition: NvInferRuntime.h:1194
virtual ~IProfiler() noexcept
Definition: NvInferRuntime.h:1205
virtual void reportLayerTime(char const *layerName, float ms) noexcept=0
Layer time reporting callback.
Definition: NvInferRuntimeBase.h:1105
IRefitter * createInferRefitter(ICudaEngine &engine, ILogger &logger) noexcept
Create an instance of an IRefitter class.
Definition: NvInferRuntime.h:4369
IRuntime * createInferRuntime(ILogger &logger) noexcept
Create an instance of an IRuntime class.
Definition: NvInferRuntime.h:4359
The TensorRT API version 1 namespace.
uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferRuntime.h:2292
v_1_0::IPluginCapability IPluginCapability
Definition: NvInferRuntime.h:682
v_1_0::IOutputAllocator IOutputAllocator
Definition: NvInferRuntime.h:3270
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:73
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:176
@ kSUB
Substract the second element from the first.
@ kSUM
Sum of the two operands.
@ kPROD
Product of the two operands.
@ kFLOOR_DIV
Floor division of the first element by the second.
@ kEQUAL
1 if operands are equal, 0 otherwise.
@ kMIN
Minimum of the two operands.
@ kLESS
1 if first operand is less than second operand, 0 otherwise.
@ kMAX
Maximum of the two operands.
@ kCEIL_DIV
Division rounding up.
TensorIOMode
Definition of tensor IO Mode.
Definition: NvInferRuntimeBase.h:1091
HardwareCompatibilityLevel
Describes requirements of compatibility with GPU architectures other than that of the GPU on which th...
Definition: NvInfer.h:8428
SerializationFlag
List of valid flags that the engine can enable when serializing the bytes.
Definition: NvInferRuntime.h:2333
@ kEXCLUDE_WEIGHTS
Exclude the weights that can be refitted.
v_1_0::IProfiler IProfiler
Definition: NvInferRuntime.h:1220
uint32_t TempfileControlFlags
Represents a collection of one or more TempfileControlFlag values combined using bitwise-OR operation...
Definition: NvInferRuntime.h:1298
PluginCapabilityType
Enumerates the different capability types a IPluginV3 object may have.
Definition: NvInferRuntimePlugin.h:904
constexpr int32_t EnumMax< WeightsRole >() noexcept
Maximum number of elements in WeightsRole enum.
Definition: NvInferRuntime.h:1241
char_t AsciiChar
Definition: NvInferRuntimeBase.h:101
TensorRTPhase
Indicates a phase of operation of TensorRT.
Definition: NvInferRuntimePlugin.h:919
@ kV2_DYNAMICEXT
IPluginV2DynamicExt.
constexpr int32_t EnumMax< LayerInformationFormat >() noexcept
Definition: NvInferRuntime.h:4183
DataType
The type of weights and tensors.
Definition: NvInferRuntimeBase.h:129
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:1252
@ kSCALE
Scale layer.
@ kCONSTANT
Constant layer.
v_1_0::IDebugListener IDebugListener
Definition: NvInferRuntime.h:3311
TempfileControlFlag
Flags used to control TensorRT's behavior when creating executable temporary files.
Definition: NvInferRuntime.h:1275
@ kALLOW_IN_MEMORY_FILES
Allow creating and loading files in-memory (or unnamed files).
constexpr int32_t EnumMax< OptProfileSelector >() noexcept
Number of different values of OptProfileSelector enum.
Definition: NvInferRuntime.h:2047
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:1230
@ kSHIFT
shift part of IScaleLayer
@ kANY
Any other weights role.
@ kBIAS
bias for IConvolutionLayer or IDeconvolutionLayer
@ kKERNEL
kernel for IConvolutionLayer or IDeconvolutionLayer
constexpr int32_t EnumMax< ProfilingVerbosity >() noexcept
Maximum number of profile verbosity levels in ProfilingVerbosity enum.
Definition: NvInferRuntime.h:2312
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
Definition: NvInferRuntime.h:2304
@ kLAYER_NAMES_ONLY
Print only the layer names. This is the default setting.
@ kDETAILED
Print detailed layer information including layer names and layer parameters.
TacticSource
List of tactic sources for TensorRT.
Definition: NvInferRuntime.h:2255
TensorFormat PluginFormat
PluginFormat is reserved for backward compatibility.
Definition: NvInferRuntimePlugin.h:46
@ kMIN
Minimum of the two elements.
constexpr int32_t EnumMax< TempfileControlFlag >() noexcept
Maximum number of elements in TempfileControlFlag enum.
Definition: NvInferRuntime.h:1287
uint32_t SerializationFlags
Represents one or more SerializationFlag values using binary OR operations, e.g., 1U << Serialization...
Definition: NvInferRuntime.h:2323
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntimeBase.h:243
ExecutionContextAllocationStrategy
Different memory allocation behaviors for IExecutionContext.
Definition: NvInferRuntime.h:2437
@ kSTATIC
Default static allocation with the maximum size across all profiles.
@ kUSER_MANAGED
The user supplies custom allocation to the execution context.
@ kON_PROFILE_CHANGE
Reallocate for a profile when it's selected.
constexpr int32_t EnumMax< TacticSource >() noexcept
Maximum number of tactic sources in TacticSource enum.
Definition: NvInferRuntime.h:2281
LayerInformationFormat
The format in which the IEngineInspector prints the layer information.
Definition: NvInferRuntime.h:4175
@ kJSON
Print layer information in JSON format.
@ kONELINE
Print layer information in one line per layer.
v_1_0::IGpuAsyncAllocator IGpuAsyncAllocator
Definition: NvInferRuntime.h:4576
@ kMAX
Maximum over elements.
constexpr int32_t EnumMax< DeviceType >() noexcept
Maximum number of elements in DeviceType enum.
Definition: NvInferRuntime.h:1259
constexpr int32_t EnumMax< DimensionOperation >() noexcept
Maximum number of elements in DimensionOperation enum.
Definition: NvInferRuntime.h:190
constexpr int32_t EnumMax< ExecutionContextAllocationStrategy >() noexcept
Maximum number of memory allocation strategies in ExecutionContextAllocationStrategy enum.
Definition: NvInferRuntime.h:2449
constexpr int32_t EnumMax< SerializationFlag >() noexcept
Maximum number of serialization flags in SerializationFlag enum.
Definition: NvInferRuntime.h:2340
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntime.h:201
@ kHOST
Data stored on host.
@ kDEVICE
Data stored on device.
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:2035
@ kOPT
This is used to set or get the value that is used in the optimization (kernel selection).
uint32_t AllocatorFlags
Definition: NvInferRuntimeBase.h:454
Summarizes tensors that a plugin might see for an input or output.
Definition: NvInferRuntime.h:359
Dims min
Lower bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:364
Dims max
Upper bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:367
Dims opt
Optimum value of tensor’s dimensions specified for auto-tuning.
Definition: NvInferRuntime.h:370
PluginTensorDesc desc
Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of ...
Definition: NvInferRuntime.h:361
Plugin field collection struct.
Definition: NvInferRuntimePlugin.h:891
Fields that a plugin might see for an input or output.
Definition: NvInferRuntimePlugin.h:65
Declaration of EnumMaxImpl struct to store maximum number of elements in an enumeration type.
Definition: NvInferRuntimeBase.h:114

  Copyright © 2024 NVIDIA Corporation
  Privacy Policy | Manage My Privacy | Do Not Sell or Share My Data | Terms of Service | Accessibility | Corporate Policies | Product Security | Contact