TensorRT 8.5.3
NvInferRuntime.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
4 *
5 * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
6 * property and proprietary rights in and to this material, related
7 * documentation and any modifications thereto. Any use, reproduction,
8 * disclosure or distribution of this material and related documentation
9 * without an express license agreement from NVIDIA CORPORATION or
10 * its affiliates is strictly prohibited.
11 */
12
13#ifndef NV_INFER_RUNTIME_H
14#define NV_INFER_RUNTIME_H
15
21
22#include "NvInferImpl.h"
24
25namespace nvinfer1
26{
27
28class IExecutionContext;
29class ICudaEngine;
30class IPluginFactory;
31class IEngineInspector;
32
41
43{
44protected:
45 INoCopy() = default;
46 virtual ~INoCopy() = default;
47 INoCopy(INoCopy const& other) = delete;
48 INoCopy& operator=(INoCopy const& other) = delete;
49 INoCopy(INoCopy&& other) = delete;
50 INoCopy& operator=(INoCopy&& other) = delete;
51};
52
67
68enum class EngineCapability : int32_t
69{
74 kSTANDARD = 0,
75
78
85 kSAFETY = 1,
86
89
96
99};
100
101namespace impl
102{
104template <>
106{
107 static constexpr int32_t kVALUE = 3;
108};
109} // namespace impl
110
126{
127public:
129 void const* values;
130 int64_t count;
131};
132
143class IHostMemory : public INoCopy
144{
145public:
146 virtual ~IHostMemory() noexcept = default;
147
149 void* data() const noexcept
150 {
151 return mImpl->data();
152 }
153
155 std::size_t size() const noexcept
156 {
157 return mImpl->size();
158 }
159
161 DataType type() const noexcept
162 {
163 return mImpl->type();
164 }
172 TRT_DEPRECATED void destroy() noexcept
173 {
174 delete this;
175 }
176
177protected:
178 apiv::VHostMemory* mImpl;
179};
180
191enum class DimensionOperation : int32_t
192{
193 kSUM = 0,
194 kPROD = 1,
195 kMAX = 2,
196 kMIN = 3,
197 kSUB = 4,
198 kEQUAL = 5,
199 kLESS = 6,
200 kFLOOR_DIV = 7,
201 kCEIL_DIV = 8
202};
203
205template <>
206constexpr inline int32_t EnumMax<DimensionOperation>() noexcept
207{
208 return 9;
209}
210
215enum class TensorLocation : int32_t
216{
217 kDEVICE = 0,
218 kHOST = 1,
219};
220
221namespace impl
222{
224template <>
226{
227 static constexpr int32_t kVALUE = 2;
228};
229} // namespace impl
230
244{
245public:
247 bool isConstant() const noexcept
248 {
249 return mImpl->isConstant();
250 }
251
254 int32_t getConstantValue() const noexcept
255 {
256 return mImpl->getConstantValue();
257 }
258
259protected:
260 apiv::VDimensionExpr* mImpl;
261 virtual ~IDimensionExpr() noexcept = default;
262};
263
281class IExprBuilder : public INoCopy
282{
283public:
285 IDimensionExpr const* constant(int32_t value) noexcept
286 {
287 return mImpl->constant(value);
288 }
289
293 DimensionOperation op, IDimensionExpr const& first, IDimensionExpr const& second) noexcept
294 {
295 return mImpl->operation(op, first, second);
296 }
297
298protected:
299 apiv::VExprBuilder* mImpl;
300 virtual ~IExprBuilder() noexcept = default;
301};
302
309{
310public:
311 int32_t nbDims;
313};
314
321{
324
327
330};
331
351{
352public:
353 IPluginV2DynamicExt* clone() const noexcept override = 0;
354
379 virtual DimsExprs getOutputDimensions(
380 int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept = 0;
381
385 static constexpr int32_t kFORMAT_COMBINATION_LIMIT = 100;
386
419 virtual bool supportsFormatCombination(
420 int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
421
460 virtual void configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs,
461 DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
462
472 virtual size_t getWorkspaceSize(PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs,
473 int32_t nbOutputs) const noexcept = 0;
474
487 virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc,
488 void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0;
489
490protected:
498 int32_t getTensorRTVersion() const noexcept override
499 {
500 return (static_cast<int32_t>(PluginVersion::kV2_DYNAMICEXT) << 24 | (NV_TENSORRT_VERSION & 0xFFFFFF));
501 }
502
503 virtual ~IPluginV2DynamicExt() noexcept {}
504
505private:
506 // Following are obsolete base class methods, and must not be implemented or used.
507
508 void configurePlugin(Dims const*, int32_t, Dims const*, int32_t, DataType const*, DataType const*, bool const*,
509 bool const*, PluginFormat, int32_t) noexcept override final
510 {
511 }
512
513 bool supportsFormat(DataType, PluginFormat) const noexcept override final
514 {
515 return false;
516 }
517
518 Dims getOutputDimensions(int32_t, Dims const*, int32_t) noexcept override final
519 {
520 return Dims{-1, {}};
521 }
522
523 bool isOutputBroadcastAcrossBatch(int32_t, bool const*, int32_t) const noexcept override final
524 {
525 return false;
526 }
527
528 bool canBroadcastInputAcrossBatch(int32_t) const noexcept override final
529 {
530 return true;
531 }
532
533 size_t getWorkspaceSize(int32_t) const noexcept override final
534 {
535 return 0;
536 }
537
538 int32_t enqueue(int32_t, void const* const*, void* const*, void*, cudaStream_t) noexcept override final
539 {
540 return 1;
541 }
542};
543
556{
557public:
564 virtual void reportLayerTime(char const* layerName, float ms) noexcept = 0;
565
566 virtual ~IProfiler() noexcept {}
567};
568
575enum class WeightsRole : int32_t
576{
577 kKERNEL = 0,
578 kBIAS = 1,
579 kSHIFT = 2,
580 kSCALE = 3,
581 kCONSTANT = 4,
582 kANY = 5,
583};
584
586template <>
587constexpr inline int32_t EnumMax<WeightsRole>() noexcept
588{
589 return 6;
590}
591
597enum class DeviceType : int32_t
598{
599 kGPU,
600 kDLA,
601};
602
604template <>
605constexpr inline int32_t EnumMax<DeviceType>() noexcept
606{
607 return 2;
608}
609
617class IRuntime : public INoCopy
618{
619public:
620 virtual ~IRuntime() noexcept = default;
621
637 TRT_DEPRECATED nvinfer1::ICudaEngine* deserializeCudaEngine(
638 void const* blob, std::size_t size, IPluginFactory* pluginFactory) noexcept
639 {
640 return mImpl->deserializeCudaEngine(blob, size, nullptr);
641 }
642
653 void setDLACore(int32_t dlaCore) noexcept
654 {
655 mImpl->setDLACore(dlaCore);
656 }
657
662 int32_t getDLACore() const noexcept
663 {
664 return mImpl->getDLACore();
665 }
666
670 int32_t getNbDLACores() const noexcept
671 {
672 return mImpl->getNbDLACores();
673 }
674
682 TRT_DEPRECATED void destroy() noexcept
683 {
684 delete this;
685 }
686
696 void setGpuAllocator(IGpuAllocator* allocator) noexcept
697 {
698 mImpl->setGpuAllocator(allocator);
699 }
700
712 //
715 void setErrorRecorder(IErrorRecorder* recorder) noexcept
716 {
717 mImpl->setErrorRecorder(recorder);
718 }
719
731 {
732 return mImpl->getErrorRecorder();
733 }
734
745 ICudaEngine* deserializeCudaEngine(void const* blob, std::size_t size) noexcept
746 {
747 return mImpl->deserializeCudaEngine(blob, size, nullptr);
748 }
749
755 ILogger* getLogger() const noexcept
756 {
757 return mImpl->getLogger();
758 }
759
769 bool setMaxThreads(int32_t maxThreads) noexcept
770 {
771 return mImpl->setMaxThreads(maxThreads);
772 }
773
783 int32_t getMaxThreads() const noexcept
784 {
785 return mImpl->getMaxThreads();
786 }
787
788protected:
789 apiv::VRuntime* mImpl;
790};
791
799class IRefitter : public INoCopy
800{
801public:
802 virtual ~IRefitter() noexcept = default;
803
817 bool setWeights(char const* layerName, WeightsRole role, Weights weights) noexcept
818 {
819 return mImpl->setWeights(layerName, role, weights);
820 }
821
832 bool refitCudaEngine() noexcept
833 {
834 return mImpl->refitCudaEngine();
835 }
836
853 int32_t getMissing(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
854 {
855 return mImpl->getMissing(size, layerNames, roles);
856 }
857
870 int32_t getAll(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
871 {
872 return mImpl->getAll(size, layerNames, roles);
873 }
874
880 TRT_DEPRECATED void destroy() noexcept
881 {
882 delete this;
883 }
884
900 bool setDynamicRange(char const* tensorName, float min, float max) noexcept
901 {
902 return mImpl->setDynamicRange(tensorName, min, max);
903 }
904
914 float getDynamicRangeMin(char const* tensorName) const noexcept
915 {
916 return mImpl->getDynamicRangeMin(tensorName);
917 }
918
928 float getDynamicRangeMax(char const* tensorName) const noexcept
929 {
930 return mImpl->getDynamicRangeMax(tensorName);
931 }
932
944 int32_t getTensorsWithDynamicRange(int32_t size, char const** tensorNames) const noexcept
945 {
946 return mImpl->getTensorsWithDynamicRange(size, tensorNames);
947 }
948
960 //
963 void setErrorRecorder(IErrorRecorder* recorder) noexcept
964 {
965 mImpl->setErrorRecorder(recorder);
966 }
967
979 {
980 return mImpl->getErrorRecorder();
981 }
982
999 bool setNamedWeights(char const* name, Weights weights) noexcept
1000 {
1001 return mImpl->setNamedWeights(name, weights);
1002 }
1003
1019 int32_t getMissingWeights(int32_t size, char const** weightsNames) noexcept
1020 {
1021 return mImpl->getMissingWeights(size, weightsNames);
1022 }
1023
1035 int32_t getAllWeights(int32_t size, char const** weightsNames) noexcept
1036 {
1037 return mImpl->getAllWeights(size, weightsNames);
1038 }
1039
1045 ILogger* getLogger() const noexcept
1046 {
1047 return mImpl->getLogger();
1048 }
1049
1059 bool setMaxThreads(int32_t maxThreads) noexcept
1060 {
1061 return mImpl->setMaxThreads(maxThreads);
1062 }
1063
1073 int32_t getMaxThreads() const noexcept
1074 {
1075 return mImpl->getMaxThreads();
1076 }
1077
1078protected:
1079 apiv::VRefitter* mImpl;
1080};
1081
1092enum class OptProfileSelector : int32_t
1093{
1094 kMIN = 0,
1095 kOPT = 1,
1096 kMAX = 2
1097};
1098
1104template <>
1105constexpr inline int32_t EnumMax<OptProfileSelector>() noexcept
1106{
1107 return 3;
1108}
1109
1133{
1134public:
1162 bool setDimensions(char const* inputName, OptProfileSelector select, Dims dims) noexcept
1163 {
1164 return mImpl->setDimensions(inputName, select, dims);
1165 }
1166
1174 Dims getDimensions(char const* inputName, OptProfileSelector select) const noexcept
1175 {
1176 return mImpl->getDimensions(inputName, select);
1177 }
1178
1221 char const* inputName, OptProfileSelector select, int32_t const* values, int32_t nbValues) noexcept
1222 {
1223 return mImpl->setShapeValues(inputName, select, values, nbValues);
1224 }
1225
1234 int32_t getNbShapeValues(char const* inputName) const noexcept
1235 {
1236 return mImpl->getNbShapeValues(inputName);
1237 }
1238
1246 int32_t const* getShapeValues(char const* inputName, OptProfileSelector select) const noexcept
1247 {
1248 return mImpl->getShapeValues(inputName, select);
1249 }
1250
1264 bool setExtraMemoryTarget(float target) noexcept
1265 {
1266 return mImpl->setExtraMemoryTarget(target);
1267 }
1268
1276 float getExtraMemoryTarget() const noexcept
1277 {
1278 return mImpl->getExtraMemoryTarget();
1279 }
1280
1293 bool isValid() const noexcept
1294 {
1295 return mImpl->isValid();
1296 }
1297
1298protected:
1299 apiv::VOptimizationProfile* mImpl;
1300 virtual ~IOptimizationProfile() noexcept = default;
1301};
1302
1311enum class TacticSource : int32_t
1312{
1315 kCUBLAS = 0,
1318 kCUBLAS_LT = 1,
1321 kCUDNN = 2,
1322
1327
1331};
1332
1333template <>
1334constexpr inline int32_t EnumMax<TacticSource>() noexcept
1335{
1336 return 5;
1337}
1338
1345using TacticSources = uint32_t;
1346
1356enum class ProfilingVerbosity : int32_t
1357{
1358 kLAYER_NAMES_ONLY = 0,
1359 kNONE = 1,
1360 kDETAILED = 2,
1361
1366};
1367
1369template <>
1370constexpr inline int32_t EnumMax<ProfilingVerbosity>() noexcept
1371{
1372 return 3;
1373}
1374
1382class ICudaEngine : public INoCopy
1383{
1384public:
1385 virtual ~ICudaEngine() noexcept = default;
1386
1399 TRT_DEPRECATED int32_t getNbBindings() const noexcept
1400 {
1401 return mImpl->getNbBindings();
1402 }
1403
1427 TRT_DEPRECATED int32_t getBindingIndex(char const* name) const noexcept
1428 {
1429 return mImpl->getBindingIndex(name);
1430 }
1431
1450 TRT_DEPRECATED char const* getBindingName(int32_t bindingIndex) const noexcept
1451 {
1452 return mImpl->getBindingName(bindingIndex);
1453 }
1454
1465 TRT_DEPRECATED bool bindingIsInput(int32_t bindingIndex) const noexcept
1466 {
1467 return mImpl->bindingIsInput(bindingIndex);
1468 }
1469
1492 TRT_DEPRECATED Dims getBindingDimensions(int32_t bindingIndex) const noexcept
1493 {
1494 return mImpl->getBindingDimensions(bindingIndex);
1495 }
1496
1507 Dims getTensorShape(char const* tensorName) const noexcept
1508 {
1509 return mImpl->getTensorShape(tensorName);
1510 }
1511
1522 TRT_DEPRECATED DataType getBindingDataType(int32_t bindingIndex) const noexcept
1523 {
1524 return mImpl->getBindingDataType(bindingIndex);
1525 }
1526
1537 DataType getTensorDataType(char const* tensorName) const noexcept
1538 {
1539 return mImpl->getTensorDataType(tensorName);
1540 }
1541
1553 TRT_DEPRECATED int32_t getMaxBatchSize() const noexcept
1554 {
1555 return mImpl->getMaxBatchSize();
1556 }
1557
1567 int32_t getNbLayers() const noexcept
1568 {
1569 return mImpl->getNbLayers();
1570 }
1571
1581 IHostMemory* serialize() const noexcept
1582 {
1583 return mImpl->serialize();
1584 }
1585
1598 {
1599 return mImpl->createExecutionContext();
1600 }
1601
1609 TRT_DEPRECATED void destroy() noexcept
1610 {
1611 delete this;
1612 }
1613
1627 TRT_DEPRECATED TensorLocation getLocation(int32_t bindingIndex) const noexcept
1628 {
1629 return mImpl->getLocation(bindingIndex);
1630 }
1631
1644 TensorLocation getTensorLocation(char const* tensorName) const noexcept
1645 {
1646 return mImpl->getTensorLocation(tensorName);
1647 }
1648
1664 bool isShapeInferenceIO(char const* tensorName) const noexcept
1665 {
1666 return mImpl->isShapeInferenceIO(tensorName);
1667 }
1668
1678 TensorIOMode getTensorIOMode(char const* tensorName) const noexcept
1679 {
1680 return mImpl->getTensorIOMode(tensorName);
1681 }
1682
1688 {
1689 return mImpl->createExecutionContextWithoutDeviceMemory();
1690 }
1691
1697 size_t getDeviceMemorySize() const noexcept
1698 {
1699 return mImpl->getDeviceMemorySize();
1700 }
1701
1707 bool isRefittable() const noexcept
1708 {
1709 return mImpl->isRefittable();
1710 }
1711
1724 TRT_DEPRECATED int32_t getBindingBytesPerComponent(int32_t bindingIndex) const noexcept
1725 {
1726 return mImpl->getBindingBytesPerComponent(bindingIndex);
1727 }
1728
1741 int32_t getTensorBytesPerComponent(char const* tensorName) const noexcept
1742 {
1743 return mImpl->getTensorBytesPerComponent(tensorName);
1744 }
1745
1757 TRT_DEPRECATED int32_t getBindingComponentsPerElement(int32_t bindingIndex) const noexcept
1758 {
1759 return mImpl->getBindingComponentsPerElement(bindingIndex);
1760 }
1761
1774 int32_t getTensorComponentsPerElement(char const* tensorName) const noexcept
1775 {
1776 return mImpl->getTensorComponentsPerElement(tensorName);
1777 }
1778
1788 TRT_DEPRECATED TensorFormat getBindingFormat(int32_t bindingIndex) const noexcept
1789 {
1790 return mImpl->getBindingFormat(bindingIndex);
1791 }
1792
1801 TensorFormat getTensorFormat(char const* tensorName) const noexcept
1802 {
1803 return mImpl->getTensorFormat(tensorName);
1804 }
1805
1825 TRT_DEPRECATED char const* getBindingFormatDesc(int32_t bindingIndex) const noexcept
1826 {
1827 return mImpl->getBindingFormatDesc(bindingIndex);
1828 }
1829
1847 char const* getTensorFormatDesc(char const* tensorName) const noexcept
1848 {
1849 return mImpl->getTensorFormatDesc(tensorName);
1850 }
1851
1863 TRT_DEPRECATED int32_t getBindingVectorizedDim(int32_t bindingIndex) const noexcept
1864 {
1865 return mImpl->getBindingVectorizedDim(bindingIndex);
1866 }
1867
1878 int32_t getTensorVectorizedDim(char const* tensorName) const noexcept
1879 {
1880 return mImpl->getTensorVectorizedDim(tensorName);
1881 }
1882
1893 char const* getName() const noexcept
1894 {
1895 return mImpl->getName();
1896 }
1897
1904 int32_t getNbOptimizationProfiles() const noexcept
1905 {
1906 return mImpl->getNbOptimizationProfiles();
1907 }
1908
1937 int32_t bindingIndex, int32_t profileIndex, OptProfileSelector select) const noexcept
1938 {
1939 return mImpl->getProfileDimensions(bindingIndex, profileIndex, select);
1940 }
1941
1957 Dims getProfileShape(char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
1958 {
1959 return mImpl->getProfileShape(tensorName, profileIndex, select);
1960 }
1961
1987 int32_t profileIndex, int32_t inputIndex, OptProfileSelector select) const noexcept
1988 {
1989 return mImpl->getProfileShapeValues(profileIndex, inputIndex, select);
1990 }
1991
2025 TRT_DEPRECATED bool isShapeBinding(int32_t bindingIndex) const noexcept
2026 {
2027 return mImpl->isShapeBinding(bindingIndex);
2028 }
2029
2042 TRT_DEPRECATED bool isExecutionBinding(int32_t bindingIndex) const noexcept
2043 {
2044 return mImpl->isExecutionBinding(bindingIndex);
2045 }
2046
2058 {
2059 return mImpl->getEngineCapability();
2060 }
2061
2072 //
2075 void setErrorRecorder(IErrorRecorder* recorder) noexcept
2076 {
2077 return mImpl->setErrorRecorder(recorder);
2078 }
2079
2091 {
2092 return mImpl->getErrorRecorder();
2093 }
2094
2109 bool hasImplicitBatchDimension() const noexcept
2110 {
2111 return mImpl->hasImplicitBatchDimension();
2112 }
2113
2125 {
2126 return mImpl->getTacticSources();
2127 }
2128
2136 {
2137 return mImpl->getProfilingVerbosity();
2138 }
2139
2146 {
2147 return mImpl->createEngineInspector();
2148 }
2149
2158 int32_t getNbIOTensors() const noexcept
2159 {
2160 return mImpl->getNbIOTensors();
2161 }
2162
2170 char const* getIOTensorName(int32_t index) const noexcept
2171 {
2172 return mImpl->getIOTensorName(index);
2173 }
2174
2175protected:
2176 apiv::VCudaEngine* mImpl;
2177};
2178
2189{
2190public:
2198 virtual int32_t getInterfaceVersion() const noexcept
2199 {
2200 return 1;
2201 }
2202
2219 virtual void* reallocateOutput(char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment) noexcept = 0;
2220
2229 virtual void notifyShape(char const* tensorName, Dims const& dims) noexcept = 0;
2230
2231 virtual ~IOutputAllocator() = default;
2232};
2233
2245{
2246public:
2247 virtual ~IExecutionContext() noexcept = default;
2248
2271 TRT_DEPRECATED bool execute(int32_t batchSize, void* const* bindings) noexcept
2272 {
2273 return mImpl->execute(batchSize, bindings);
2274 }
2275
2306 int32_t batchSize, void* const* bindings, cudaStream_t stream, cudaEvent_t* inputConsumed) noexcept
2307 {
2308 return mImpl->enqueue(batchSize, bindings, stream, inputConsumed);
2309 }
2310
2319 void setDebugSync(bool sync) noexcept
2320 {
2321 mImpl->setDebugSync(sync);
2322 }
2323
2329 bool getDebugSync() const noexcept
2330 {
2331 return mImpl->getDebugSync();
2332 }
2333
2339 void setProfiler(IProfiler* profiler) noexcept
2340 {
2341 mImpl->setProfiler(profiler);
2342 }
2343
2349 IProfiler* getProfiler() const noexcept
2350 {
2351 return mImpl->getProfiler();
2352 }
2353
2359 ICudaEngine const& getEngine() const noexcept
2360 {
2361 return mImpl->getEngine();
2362 }
2363
2371 TRT_DEPRECATED void destroy() noexcept
2372 {
2373 delete this;
2374 }
2375
2385 void setName(char const* name) noexcept
2386 {
2387 mImpl->setName(name);
2388 }
2389
2395 char const* getName() const noexcept
2396 {
2397 return mImpl->getName();
2398 }
2399
2412 void setDeviceMemory(void* memory) noexcept
2413 {
2414 mImpl->setDeviceMemory(memory);
2415 }
2416
2435 TRT_DEPRECATED Dims getStrides(int32_t bindingIndex) const noexcept
2436 {
2437 return mImpl->getStrides(bindingIndex);
2438 }
2439
2456 Dims getTensorStrides(char const* tensorName) const noexcept
2457 {
2458 return mImpl->getTensorStrides(tensorName);
2459 }
2460
2461public:
2500 bool setOptimizationProfile(int32_t profileIndex) noexcept
2501 {
2502 return mImpl->setOptimizationProfile(profileIndex);
2503 }
2504
2512 int32_t getOptimizationProfile() const noexcept
2513 {
2514 return mImpl->getOptimizationProfile();
2515 }
2516
2551 TRT_DEPRECATED bool setBindingDimensions(int32_t bindingIndex, Dims dimensions) noexcept
2552 {
2553 return mImpl->setBindingDimensions(bindingIndex, dimensions);
2554 }
2555
2569 bool setInputShape(char const* tensorName, Dims const& dims) noexcept
2570 {
2571 return mImpl->setInputShape(tensorName, dims);
2572 }
2573
2602 TRT_DEPRECATED Dims getBindingDimensions(int32_t bindingIndex) const noexcept
2603 {
2604 return mImpl->getBindingDimensions(bindingIndex);
2605 }
2606
2639 Dims getTensorShape(char const* tensorName) const noexcept
2640 {
2641 return mImpl->getTensorShape(tensorName);
2642 }
2643
2674 TRT_DEPRECATED bool setInputShapeBinding(int32_t bindingIndex, int32_t const* data) noexcept
2675 {
2676 return mImpl->setInputShapeBinding(bindingIndex, data);
2677 }
2678
2698 TRT_DEPRECATED bool getShapeBinding(int32_t bindingIndex, int32_t* data) const noexcept
2699 {
2700 return mImpl->getShapeBinding(bindingIndex, data);
2701 }
2702
2716 bool allInputDimensionsSpecified() const noexcept
2717 {
2718 return mImpl->allInputDimensionsSpecified();
2719 }
2720
2733 bool allInputShapesSpecified() const noexcept
2734 {
2735 return mImpl->allInputShapesSpecified();
2736 }
2737
2749 //
2752 void setErrorRecorder(IErrorRecorder* recorder) noexcept
2753 {
2754 mImpl->setErrorRecorder(recorder);
2755 }
2756
2768 {
2769 return mImpl->getErrorRecorder();
2770 }
2771
2784 bool executeV2(void* const* bindings) noexcept
2785 {
2786 return mImpl->executeV2(bindings);
2787 }
2788
2814 TRT_DEPRECATED bool enqueueV2(void* const* bindings, cudaStream_t stream, cudaEvent_t* inputConsumed) noexcept
2815 {
2816 return mImpl->enqueueV2(bindings, stream, inputConsumed);
2817 }
2818
2861 bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
2862 {
2863 return mImpl->setOptimizationProfileAsync(profileIndex, stream);
2864 }
2865
2876 void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
2877 {
2878 mImpl->setEnqueueEmitsProfile(enqueueEmitsProfile);
2879 }
2880
2887 bool getEnqueueEmitsProfile() const noexcept
2888 {
2889 return mImpl->getEnqueueEmitsProfile();
2890 }
2891
2916 bool reportToProfiler() const noexcept
2917 {
2918 return mImpl->reportToProfiler();
2919 }
2920
2958 bool setTensorAddress(char const* tensorName, void* data) noexcept
2959 {
2960 return mImpl->setTensorAddress(tensorName, data);
2961 }
2962
2975 void const* getTensorAddress(char const* tensorName) const noexcept
2976 {
2977 return mImpl->getTensorAddress(tensorName);
2978 }
2979
2997 bool setInputTensorAddress(char const* tensorName, void const* data) noexcept
2998 {
2999 return mImpl->setInputTensorAddress(tensorName, data);
3000 }
3001
3016 void* getOutputTensorAddress(char const* tensorName) const noexcept
3017 {
3018 return mImpl->getOutputTensorAddress(tensorName);
3019 }
3020
3049 int32_t inferShapes(int32_t nbMaxNames, char const** tensorNames) noexcept
3050 {
3051 return mImpl->inferShapes(nbMaxNames, tensorNames);
3052 }
3053
3065 bool setInputConsumedEvent(cudaEvent_t event) noexcept
3066 {
3067 return mImpl->setInputConsumedEvent(event);
3068 }
3069
3075 cudaEvent_t getInputConsumedEvent() const noexcept
3076 {
3077 return mImpl->getInputConsumedEvent();
3078 }
3079
3094 bool setOutputAllocator(char const* tensorName, IOutputAllocator* outputAllocator) noexcept
3095 {
3096 return mImpl->setOutputAllocator(tensorName, outputAllocator);
3097 }
3098
3107 IOutputAllocator* getOutputAllocator(char const* tensorName) const noexcept
3108 {
3109 return mImpl->getOutputAllocator(tensorName);
3110 }
3111
3125 int64_t getMaxOutputSize(char const* tensorName) const noexcept
3126 {
3127 return mImpl->getMaxOutputSize(tensorName);
3128 }
3129
3146 {
3147 return mImpl->setTemporaryStorageAllocator(allocator);
3148 }
3149
3156 {
3157 return mImpl->getTemporaryStorageAllocator();
3158 }
3159
3173 bool enqueueV3(cudaStream_t stream) noexcept
3174 {
3175 return mImpl->enqueueV3(stream);
3176 }
3177
3188 void setPersistentCacheLimit(size_t size) noexcept
3189 {
3190 mImpl->setPersistentCacheLimit(size);
3191 }
3192
3199 size_t getPersistentCacheLimit() const noexcept
3200 {
3201 return mImpl->getPersistentCacheLimit();
3202 }
3203
3223 bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
3224 {
3225 return mImpl->setNvtxVerbosity(verbosity);
3226 }
3227
3236 {
3237 return mImpl->getNvtxVerbosity();
3238 }
3239
3240protected:
3241 apiv::VExecutionContext* mImpl;
3242}; // class IExecutionContext
3243
3251enum class LayerInformationFormat : int32_t
3252{
3253 kONELINE = 0,
3254 kJSON = 1,
3255};
3256
3259template <>
3260constexpr inline int32_t EnumMax<LayerInformationFormat>() noexcept
3261{
3262 return 2;
3263}
3264
3281{
3282public:
3283 virtual ~IEngineInspector() noexcept = default;
3284
3297 bool setExecutionContext(IExecutionContext const* context) noexcept
3298 {
3299 return mImpl->setExecutionContext(context);
3300 }
3301
3310 {
3311 return mImpl->getExecutionContext();
3312 }
3313
3334 char const* getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
3335 {
3336 return mImpl->getLayerInformation(layerIndex, format);
3337 }
3338
3359 char const* getEngineInformation(LayerInformationFormat format) const noexcept
3360 {
3361 return mImpl->getEngineInformation(format);
3362 }
3363
3375 //
3378 void setErrorRecorder(IErrorRecorder* recorder) noexcept
3379 {
3380 mImpl->setErrorRecorder(recorder);
3381 }
3382
3394 {
3395 return mImpl->getErrorRecorder();
3396 }
3397
3398protected:
3399 apiv::VEngineInspector* mImpl;
3400}; // class IEngineInspector
3401
3402} // namespace nvinfer1
3403
3408extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(void* logger, int32_t version) noexcept;
3409
3414extern "C" TENSORRTAPI void* createInferRefitter_INTERNAL(void* engine, void* logger, int32_t version) noexcept;
3415
3420
3426extern "C" TENSORRTAPI nvinfer1::ILogger* getLogger() noexcept;
3427
3428namespace nvinfer1
3429{
3430namespace // unnamed namespace avoids linkage surprises when linking objects built with different versions of this
3431 // header.
3432{
3438inline IRuntime* createInferRuntime(ILogger& logger) noexcept
3439{
3440 return static_cast<IRuntime*>(createInferRuntime_INTERNAL(&logger, NV_TENSORRT_VERSION));
3441}
3442
3448inline IRefitter* createInferRefitter(ICudaEngine& engine, ILogger& logger) noexcept
3449{
3450 return static_cast<IRefitter*>(createInferRefitter_INTERNAL(&engine, &logger, NV_TENSORRT_VERSION));
3451}
3452
3453} // namespace
3454
3466template <typename T>
3468{
3469public:
3471 {
3472 getPluginRegistry()->registerCreator(instance, "");
3473 }
3474
3475private:
3477 T instance{};
3478};
3479
3480} // namespace nvinfer1
3481
3482#define REGISTER_TENSORRT_PLUGIN(name) \
3483 static nvinfer1::PluginRegistrar<name> pluginRegistrar##name {}
3484#endif // NV_INFER_RUNTIME_H
nvinfer1::ILogger * getLogger() noexcept
Return the logger object.
nvinfer1::IPluginRegistry * getPluginRegistry() noexcept
Return the plugin registry.
#define TENSORRTAPI
Definition: NvInferRuntimeCommon.h:54
#define NV_TENSORRT_VERSION
Definition: NvInferRuntimeCommon.h:73
#define TRT_DEPRECATED
Definition: NvInferRuntimeCommon.h:40
#define TRT_DEPRECATED_ENUM
Definition: NvInferRuntimeCommon.h:41
Structure to define the dimensions of a tensor.
Definition: NvInferRuntimeCommon.h:171
static constexpr int32_t MAX_DIMS
The maximum rank (number of dimensions) supported for a tensor.
Definition: NvInferRuntimeCommon.h:174
Definition: NvInferRuntime.h:309
int32_t nbDims
The number of dimensions.
Definition: NvInferRuntime.h:311
An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:1383
int32_t getTensorBytesPerComponent(char const *tensorName) const noexcept
Return the number of bytes per component of an element, or -1 if the provided name does not map to an...
Definition: NvInferRuntime.h:1741
bool hasImplicitBatchDimension() const noexcept
Query whether the engine was built with an implicit batch dimension.
Definition: NvInferRuntime.h:2109
char const * getIOTensorName(int32_t index) const noexcept
Return name of an IO tensor.
Definition: NvInferRuntime.h:2170
EngineCapability getEngineCapability() const noexcept
Determine what execution capability this engine has.
Definition: NvInferRuntime.h:2057
TRT_DEPRECATED bool isExecutionBinding(int32_t bindingIndex) const noexcept
True if pointer to tensor data is required for execution phase, false if nullptr can be supplied.
Definition: NvInferRuntime.h:2042
TRT_DEPRECATED int32_t getBindingIndex(char const *name) const noexcept
Retrieve the binding index for a named tensor.
Definition: NvInferRuntime.h:1427
TRT_DEPRECATED void destroy() noexcept
Destroy this object;.
Definition: NvInferRuntime.h:1609
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:2090
TRT_DEPRECATED char const * getBindingName(int32_t bindingIndex) const noexcept
Retrieve the name corresponding to a binding index.
Definition: NvInferRuntime.h:1450
apiv::VCudaEngine * mImpl
Definition: NvInferRuntime.h:2176
char const * getTensorFormatDesc(char const *tensorName) const noexcept
Return the human readable description of the tensor format, or empty string if the provided name does...
Definition: NvInferRuntime.h:1847
Dims getProfileShape(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for an input tensor given its name under an optimizati...
Definition: NvInferRuntime.h:1957
TRT_DEPRECATED int32_t const * getProfileShapeValues(int32_t profileIndex, int32_t inputIndex, OptProfileSelector select) const noexcept
Get minimum / optimum / maximum values for an input shape binding under an optimization profile.
Definition: NvInferRuntime.h:1986
TRT_DEPRECATED bool bindingIsInput(int32_t bindingIndex) const noexcept
Determine whether a binding is an input binding.
Definition: NvInferRuntime.h:1465
TRT_DEPRECATED Dims getBindingDimensions(int32_t bindingIndex) const noexcept
Get the dimensions of a binding.
Definition: NvInferRuntime.h:1492
DataType getTensorDataType(char const *tensorName) const noexcept
Determine the required data type for a buffer from its tensor name.
Definition: NvInferRuntime.h:1537
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:2075
size_t getDeviceMemorySize() const noexcept
Return the amount of device memory required by an execution context.
Definition: NvInferRuntime.h:1697
TacticSources getTacticSources() const noexcept
return the tactic sources required by this engine.
Definition: NvInferRuntime.h:2124
TRT_DEPRECATED TensorFormat getBindingFormat(int32_t bindingIndex) const noexcept
Return the binding format.
Definition: NvInferRuntime.h:1788
virtual ~ICudaEngine() noexcept=default
TRT_DEPRECATED int32_t getBindingComponentsPerElement(int32_t bindingIndex) const noexcept
Return the number of components included in one element.
Definition: NvInferRuntime.h:1757
char const * getName() const noexcept
Returns the name of the network associated with the engine.
Definition: NvInferRuntime.h:1893
ProfilingVerbosity getProfilingVerbosity() const noexcept
Return the ProfilingVerbosity the builder config was set to when the engine was built.
Definition: NvInferRuntime.h:2135
bool isShapeInferenceIO(char const *tensorName) const noexcept
True if tensor is required as input for shape calculations or is output from shape calculations.
Definition: NvInferRuntime.h:1664
TRT_DEPRECATED bool isShapeBinding(int32_t bindingIndex) const noexcept
True if tensor is required as input for shape calculations or output from them.
Definition: NvInferRuntime.h:2025
int32_t getTensorVectorizedDim(char const *tensorName) const noexcept
Return the dimension index that the buffer is vectorized, or -1 if the provided name does not map to ...
Definition: NvInferRuntime.h:1878
TRT_DEPRECATED DataType getBindingDataType(int32_t bindingIndex) const noexcept
Determine the required data type for a buffer from its binding index.
Definition: NvInferRuntime.h:1522
TRT_DEPRECATED int32_t getBindingVectorizedDim(int32_t bindingIndex) const noexcept
Return the dimension index that the buffer is vectorized, or -1 is the name is not found.
Definition: NvInferRuntime.h:1863
TRT_DEPRECATED char const * getBindingFormatDesc(int32_t bindingIndex) const noexcept
Return the human readable description of the tensor format, or nullptr if the provided name does not ...
Definition: NvInferRuntime.h:1825
TensorFormat getTensorFormat(char const *tensorName) const noexcept
Return the binding format, or TensorFormat::kLINEAR if the provided name does not map to an input or ...
Definition: NvInferRuntime.h:1801
IHostMemory * serialize() const noexcept
Serialize the network to a stream.
Definition: NvInferRuntime.h:1581
IExecutionContext * createExecutionContextWithoutDeviceMemory() noexcept
create an execution context without any device memory allocated
Definition: NvInferRuntime.h:1687
TensorLocation getTensorLocation(char const *tensorName) const noexcept
Get whether an input or output tensor must be on GPU or CPU.
Definition: NvInferRuntime.h:1644
IExecutionContext * createExecutionContext() noexcept
Create an execution context.
Definition: NvInferRuntime.h:1597
IEngineInspector * createEngineInspector() const noexcept
Create a new engine inspector which prints the layer information in an engine or an execution context...
Definition: NvInferRuntime.h:2145
TRT_DEPRECATED int32_t getMaxBatchSize() const noexcept
Get the maximum batch size which can be used for inference. Should only be called if the engine is bu...
Definition: NvInferRuntime.h:1553
int32_t getNbOptimizationProfiles() const noexcept
Get the number of optimization profiles defined for this engine.
Definition: NvInferRuntime.h:1904
TensorIOMode getTensorIOMode(char const *tensorName) const noexcept
Determine whether a tensor is an input or output tensor.
Definition: NvInferRuntime.h:1678
int32_t getNbLayers() const noexcept
Get the number of layers in the network.
Definition: NvInferRuntime.h:1567
TRT_DEPRECATED TensorLocation getLocation(int32_t bindingIndex) const noexcept
Get location of binding.
Definition: NvInferRuntime.h:1627
int32_t getNbIOTensors() const noexcept
Return number of IO tensors.
Definition: NvInferRuntime.h:2158
TRT_DEPRECATED Dims getProfileDimensions(int32_t bindingIndex, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a particular input binding under an optimization p...
Definition: NvInferRuntime.h:1936
TRT_DEPRECATED int32_t getBindingBytesPerComponent(int32_t bindingIndex) const noexcept
Return the number of bytes per component of an element.
Definition: NvInferRuntime.h:1724
Dims getTensorShape(char const *tensorName) const noexcept
Get shape of an input or output tensor.
Definition: NvInferRuntime.h:1507
int32_t getTensorComponentsPerElement(char const *tensorName) const noexcept
Return the number of components included in one element, or -1 if the provided name does not map to a...
Definition: NvInferRuntime.h:1774
bool isRefittable() const noexcept
Return true if an engine can be refit.
Definition: NvInferRuntime.h:1707
Definition: NvInferRuntime.h:244
bool isConstant() const noexcept
Return true if expression is a build-time constant.
Definition: NvInferRuntime.h:247
virtual ~IDimensionExpr() noexcept=default
apiv::VDimensionExpr * mImpl
Definition: NvInferRuntime.h:260
int32_t getConstantValue() const noexcept
Definition: NvInferRuntime.h:254
An engine inspector which prints out the layer information of an engine or an execution context.
Definition: NvInferRuntime.h:3281
char const * getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
Get a string describing the information about a specific layer in the current engine or the execution...
Definition: NvInferRuntime.h:3334
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:3393
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:3378
IExecutionContext const * getExecutionContext() const noexcept
Get the context currently being inspected.
Definition: NvInferRuntime.h:3309
apiv::VEngineInspector * mImpl
Definition: NvInferRuntime.h:3399
virtual ~IEngineInspector() noexcept=default
char const * getEngineInformation(LayerInformationFormat format) const noexcept
Get a string describing the information about all the layers in the current engine or the execution c...
Definition: NvInferRuntime.h:3359
Reference counted application-implemented error reporting interface for TensorRT objects.
Definition: NvInferRuntimeCommon.h:1689
Context for executing inference using an engine, with functionally unsafe features.
Definition: NvInferRuntime.h:2245
TRT_DEPRECATED bool enqueue(int32_t batchSize, void *const *bindings, cudaStream_t stream, cudaEvent_t *inputConsumed) noexcept
Asynchronously execute inference on a batch.
Definition: NvInferRuntime.h:2305
IOutputAllocator * getOutputAllocator(char const *tensorName) const noexcept
Get output allocator associated with output tensor of given name, or nullptr if the provided name doe...
Definition: NvInferRuntime.h:3107
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:2767
bool reportToProfiler() const noexcept
Calculate layer timing info for the current optimization profile in IExecutionContext and update the ...
Definition: NvInferRuntime.h:2916
virtual ~IExecutionContext() noexcept=default
void setDeviceMemory(void *memory) noexcept
Set the device memory for use by this execution context.
Definition: NvInferRuntime.h:2412
char const * getName() const noexcept
Return the name of the execution context.
Definition: NvInferRuntime.h:2395
void setDebugSync(bool sync) noexcept
Set the debug sync flag.
Definition: NvInferRuntime.h:2319
IGpuAllocator * getTemporaryStorageAllocator() const noexcept
Get allocator set by setTemporaryStorageAllocator.
Definition: NvInferRuntime.h:3155
void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
Set whether enqueue emits layer timing to the profiler.
Definition: NvInferRuntime.h:2876
Dims getTensorShape(char const *tensorName) const noexcept
Return the shape of the given input or output.
Definition: NvInferRuntime.h:2639
bool setInputShape(char const *tensorName, Dims const &dims) noexcept
Set shape of given input.
Definition: NvInferRuntime.h:2569
bool executeV2(void *const *bindings) noexcept
Synchronously execute inference a network.
Definition: NvInferRuntime.h:2784
bool getEnqueueEmitsProfile() const noexcept
Get the enqueueEmitsProfile state.
Definition: NvInferRuntime.h:2887
TRT_DEPRECATED bool setBindingDimensions(int32_t bindingIndex, Dims dimensions) noexcept
Set the dynamic dimensions of an input binding.
Definition: NvInferRuntime.h:2551
void const * getTensorAddress(char const *tensorName) const noexcept
Get memory address bound to given input or output tensor, or nullptr if the provided name does not ma...
Definition: NvInferRuntime.h:2975
bool setOutputAllocator(char const *tensorName, IOutputAllocator *outputAllocator) noexcept
Set output allocator to use for output tensor of given name. Pass nullptr to outputAllocator to unset...
Definition: NvInferRuntime.h:3094
TRT_DEPRECATED bool enqueueV2(void *const *bindings, cudaStream_t stream, cudaEvent_t *inputConsumed) noexcept
Asynchronously execute inference.
Definition: NvInferRuntime.h:2814
bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
Select an optimization profile for the current context with async semantics.
Definition: NvInferRuntime.h:2861
TRT_DEPRECATED bool setInputShapeBinding(int32_t bindingIndex, int32_t const *data) noexcept
Set values of input tensor required by shape calculations.
Definition: NvInferRuntime.h:2674
apiv::VExecutionContext * mImpl
Definition: NvInferRuntime.h:3241
void setPersistentCacheLimit(size_t size) noexcept
Set the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:3188
size_t getPersistentCacheLimit() const noexcept
Get the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:3199
ICudaEngine const & getEngine() const noexcept
Get the associated engine.
Definition: NvInferRuntime.h:2359
ProfilingVerbosity getNvtxVerbosity() const noexcept
Get the NVTX verbosity of the execution context.
Definition: NvInferRuntime.h:3235
int64_t getMaxOutputSize(char const *tensorName) const noexcept
Get upper bound on an output tensor's size, in bytes, based on the current optimization profile and i...
Definition: NvInferRuntime.h:3125
int32_t inferShapes(int32_t nbMaxNames, char const **tensorNames) noexcept
Run shape calculations.
Definition: NvInferRuntime.h:3049
bool setTensorAddress(char const *tensorName, void *data) noexcept
Set memory address for given input or output tensor.
Definition: NvInferRuntime.h:2958
bool setTemporaryStorageAllocator(IGpuAllocator *allocator) noexcept
Specify allocator to use for internal temporary storage.
Definition: NvInferRuntime.h:3145
void * getOutputTensorAddress(char const *tensorName) const noexcept
Get memory address for given output.
Definition: NvInferRuntime.h:3016
bool enqueueV3(cudaStream_t stream) noexcept
Asynchronously execute inference.
Definition: NvInferRuntime.h:3173
int32_t getOptimizationProfile() const noexcept
Get the index of the currently selected optimization profile.
Definition: NvInferRuntime.h:2512
bool setInputTensorAddress(char const *tensorName, void const *data) noexcept
Set memory address for given input.
Definition: NvInferRuntime.h:2997
TRT_DEPRECATED void destroy() noexcept
Destroy this object.
Definition: NvInferRuntime.h:2371
bool getDebugSync() const noexcept
Get the debug sync flag.
Definition: NvInferRuntime.h:2329
TRT_DEPRECATED Dims getBindingDimensions(int32_t bindingIndex) const noexcept
Get the dynamic dimensions of a binding.
Definition: NvInferRuntime.h:2602
bool setInputConsumedEvent(cudaEvent_t event) noexcept
Mark input as consumed.
Definition: NvInferRuntime.h:3065
Dims getTensorStrides(char const *tensorName) const noexcept
Return the strides of the buffer for the given tensor name.
Definition: NvInferRuntime.h:2456
bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
Set the verbosity of the NVTX markers in the execution context.
Definition: NvInferRuntime.h:3223
IProfiler * getProfiler() const noexcept
Get the profiler.
Definition: NvInferRuntime.h:2349
TRT_DEPRECATED Dims getStrides(int32_t bindingIndex) const noexcept
Return the strides of the buffer for the given binding.
Definition: NvInferRuntime.h:2435
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:2752
TRT_DEPRECATED bool setOptimizationProfile(int32_t profileIndex) noexcept
Select an optimization profile for the current context.
Definition: NvInferRuntime.h:2500
bool allInputDimensionsSpecified() const noexcept
Whether all dynamic dimensions of input tensors have been specified.
Definition: NvInferRuntime.h:2716
void setProfiler(IProfiler *profiler) noexcept
Set the profiler.
Definition: NvInferRuntime.h:2339
TRT_DEPRECATED bool getShapeBinding(int32_t bindingIndex, int32_t *data) const noexcept
Get values of an input tensor required for shape calculations or an output tensor produced by shape c...
Definition: NvInferRuntime.h:2698
void setName(char const *name) noexcept
Set the name of the execution context.
Definition: NvInferRuntime.h:2385
cudaEvent_t getInputConsumedEvent() const noexcept
The event associated with consuming the input.
Definition: NvInferRuntime.h:3075
bool allInputShapesSpecified() const noexcept
Whether all input shape bindings have been specified.
Definition: NvInferRuntime.h:2733
Definition: NvInferRuntime.h:282
IDimensionExpr const * operation(DimensionOperation op, IDimensionExpr const &first, IDimensionExpr const &second) noexcept
Definition: NvInferRuntime.h:292
virtual ~IExprBuilder() noexcept=default
apiv::VExprBuilder * mImpl
Definition: NvInferRuntime.h:299
IDimensionExpr const * constant(int32_t value) noexcept
Return pointer to IDimensionExp for given value.
Definition: NvInferRuntime.h:285
Application-implemented class for controlling allocation on the GPU.
Definition: NvInferRuntimeCommon.h:1362
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:144
void * data() const noexcept
A pointer to the raw data that is owned by the library.
Definition: NvInferRuntime.h:149
DataType type() const noexcept
The type of the memory that was allocated.
Definition: NvInferRuntime.h:161
TRT_DEPRECATED void destroy() noexcept
Definition: NvInferRuntime.h:172
std::size_t size() const noexcept
The size in bytes of the data that was allocated.
Definition: NvInferRuntime.h:155
apiv::VHostMemory * mImpl
Definition: NvInferRuntime.h:178
virtual ~IHostMemory() noexcept=default
Application-implemented logging interface for the builder, refitter and runtime.
Definition: NvInferRuntimeCommon.h:1500
Forward declaration of IEngineInspector for use by other interfaces.
Definition: NvInferRuntime.h:43
INoCopy & operator=(INoCopy &&other)=delete
INoCopy(INoCopy const &other)=delete
INoCopy(INoCopy &&other)=delete
virtual ~INoCopy()=default
INoCopy & operator=(INoCopy const &other)=delete
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:1133
int32_t const * getShapeValues(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:1246
apiv::VOptimizationProfile * mImpl
Definition: NvInferRuntime.h:1299
Dims getDimensions(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:1174
virtual ~IOptimizationProfile() noexcept=default
float getExtraMemoryTarget() const noexcept
Get the extra memory target that has been defined for this profile.
Definition: NvInferRuntime.h:1276
bool setDimensions(char const *inputName, OptProfileSelector select, Dims dims) noexcept
Set the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:1162
bool setExtraMemoryTarget(float target) noexcept
Set a target for extra GPU memory that may be used by this profile.
Definition: NvInferRuntime.h:1264
bool setShapeValues(char const *inputName, OptProfileSelector select, int32_t const *values, int32_t nbValues) noexcept
Set the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:1220
bool isValid() const noexcept
Check whether the optimization profile can be passed to an IBuilderConfig object.
Definition: NvInferRuntime.h:1293
int32_t getNbShapeValues(char const *inputName) const noexcept
Get the number of values for an input shape tensor.
Definition: NvInferRuntime.h:1234
Callback from ExecutionContext::enqueueV3()
Definition: NvInferRuntime.h:2189
virtual void notifyShape(char const *tensorName, Dims const &dims) noexcept=0
Called by TensorRT when the shape of the output tensor is known.
virtual ~IOutputAllocator()=default
virtual int32_t getInterfaceVersion() const noexcept
Return the API version of this IOutputAllocator.
Definition: NvInferRuntime.h:2198
virtual void * reallocateOutput(char const *tensorName, void *currentMemory, uint64_t size, uint64_t alignment) noexcept=0
Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated.
Single registration point for all plugins in an application. It is used to find plugin implementation...
Definition: NvInferRuntimeCommon.h:1234
virtual bool registerCreator(IPluginCreator &creator, AsciiChar const *const pluginNamespace) noexcept=0
Register a plugin creator. Returns false if one with same type is already registered.
Definition: NvInferRuntime.h:351
IPluginV2DynamicExt * clone() const noexcept override=0
Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin...
virtual ~IPluginV2DynamicExt() noexcept
Definition: NvInferRuntime.h:503
Plugin class for user-implemented layers.
Definition: NvInferRuntimeCommon.h:704
Application-implemented interface for profiling.
Definition: NvInferRuntime.h:556
virtual void reportLayerTime(char const *layerName, float ms) noexcept=0
Layer time reporting callback.
virtual ~IProfiler() noexcept
Definition: NvInferRuntime.h:566
Updates weights in an engine.
Definition: NvInferRuntime.h:800
int32_t getMaxThreads() const noexcept
get the maximum number of threads that can be used by the refitter.
Definition: NvInferRuntime.h:1073
bool setNamedWeights(char const *name, Weights weights) noexcept
Specify new weights of given name.
Definition: NvInferRuntime.h:999
int32_t getAllWeights(int32_t size, char const **weightsNames) noexcept
Get names of all weights that could be refit.
Definition: NvInferRuntime.h:1035
bool setDynamicRange(char const *tensorName, float min, float max) noexcept
Definition: NvInferRuntime.h:900
ILogger * getLogger() const noexcept
get the logger with which the refitter was created
Definition: NvInferRuntime.h:1045
int32_t getTensorsWithDynamicRange(int32_t size, char const **tensorNames) const noexcept
Get names of all tensors that have refittable dynamic ranges.
Definition: NvInferRuntime.h:944
float getDynamicRangeMin(char const *tensorName) const noexcept
Get minimum of dynamic range.
Definition: NvInferRuntime.h:914
bool refitCudaEngine() noexcept
Updates associated engine. Return true if successful.
Definition: NvInferRuntime.h:832
int32_t getMissingWeights(int32_t size, char const **weightsNames) noexcept
Get names of missing weights.
Definition: NvInferRuntime.h:1019
int32_t getMissing(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of missing weights.
Definition: NvInferRuntime.h:853
TRT_DEPRECATED void destroy() noexcept
Definition: NvInferRuntime.h:880
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:1059
float getDynamicRangeMax(char const *tensorName) const noexcept
Get maximum of dynamic range.
Definition: NvInferRuntime.h:928
apiv::VRefitter * mImpl
Definition: NvInferRuntime.h:1079
int32_t getAll(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of all weights that could be refit.
Definition: NvInferRuntime.h:870
virtual ~IRefitter() noexcept=default
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:963
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:978
Allows a serialized functionally unsafe engine to be deserialized.
Definition: NvInferRuntime.h:618
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:769
virtual ~IRuntime() noexcept=default
TRT_DEPRECATED void destroy() noexcept
Destroy this object.
Definition: NvInferRuntime.h:682
apiv::VRuntime * mImpl
Definition: NvInferRuntime.h:789
void setDLACore(int32_t dlaCore) noexcept
Sets the DLA core used by the network. Defaults to -1.
Definition: NvInferRuntime.h:653
int32_t getNbDLACores() const noexcept
Returns number of DLA hardware cores accessible or 0 if DLA is unavailable.
Definition: NvInferRuntime.h:670
ICudaEngine * deserializeCudaEngine(void const *blob, std::size_t size) noexcept
Deserialize an engine from a stream.
Definition: NvInferRuntime.h:745
int32_t getDLACore() const noexcept
Get the DLA core that the engine executes on.
Definition: NvInferRuntime.h:662
void setGpuAllocator(IGpuAllocator *allocator) noexcept
Set the GPU allocator.
Definition: NvInferRuntime.h:696
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:730
ILogger * getLogger() const noexcept
get the logger with which the runtime was created
Definition: NvInferRuntime.h:755
int32_t getMaxThreads() const noexcept
Get the maximum number of threads that can be used by the runtime.
Definition: NvInferRuntime.h:783
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:715
Register the plugin creator to the registry The static registry object will be instantiated when the ...
Definition: NvInferRuntime.h:3468
PluginRegistrar()
Definition: NvInferRuntime.h:3470
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:126
DataType type
The type of the weights.
Definition: NvInferRuntime.h:128
int64_t count
The number of weights in the array.
Definition: NvInferRuntime.h:130
void const * values
The weight values, in a contiguous array.
Definition: NvInferRuntime.h:129
IRuntime * createInferRuntime(ILogger &logger) noexcept
Create an instance of an safe::IRuntime class.
The TensorRT API version 1 namespace.
uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferRuntime.h:1345
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:69
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:192
@ kSUB
Substract the second element from the first.
@ kSUM
Sum of the two operands.
@ kPROD
Product of the two operands.
@ kFLOOR_DIV
Floor division of the first element by the second.
@ kEQUAL
1 if operands are equal, 0 otherwise.
@ kMIN
Minimum of the two operands.
@ kLESS
1 if first operand is less than second operand, 0 otherwise.
@ kMAX
Maximum of the two operands.
@ kCEIL_DIV
Division rounding up.
TensorIOMode
Definition of tensor IO Mode.
Definition: NvInferRuntimeCommon.h:1878
constexpr int32_t EnumMax< WeightsRole >() noexcept
Maximum number of elements in WeightsRole enum.
Definition: NvInferRuntime.h:587
@ kV2_DYNAMICEXT
IPluginV2DynamicExt.
constexpr int32_t EnumMax< LayerInformationFormat >() noexcept
Definition: NvInferRuntime.h:3260
DataType
The type of weights and tensors.
Definition: NvInferRuntimeCommon.h:117
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:598
@ kSCALE
Scale layer.
@ kCONSTANT
Constant layer.
@ kDEFAULT
Similar to ONNX Gather.
constexpr int32_t EnumMax< OptProfileSelector >() noexcept
Number of different values of OptProfileSelector enum.
Definition: NvInferRuntime.h:1105
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:576
@ kSHIFT
shift part of IScaleLayer
@ kANY
Any other weights role.
@ kBIAS
bias for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
@ kKERNEL
kernel for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
constexpr int32_t EnumMax< ProfilingVerbosity >() noexcept
Maximum number of profile verbosity levels in ProfilingVerbosity enum.
Definition: NvInferRuntime.h:1370
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
Definition: NvInferRuntime.h:1357
@ kLAYER_NAMES_ONLY
Print only the layer names. This is the default setting.
@ kDETAILED
Print detailed layer information including layer names and layer parameters.
TacticSource
List of tactic sources for TensorRT.
Definition: NvInferRuntime.h:1312
TensorFormat PluginFormat
PluginFormat is reserved for backward compatibility.
Definition: NvInferRuntimeCommon.h:326
@ kMIN
Minimum of the two elements.
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntimeCommon.h:201
constexpr int32_t EnumMax< TacticSource >() noexcept
Maximum number of tactic sources in TacticSource enum.
Definition: NvInferRuntime.h:1334
LayerInformationFormat
The format in which the IEngineInspector prints the layer information.
Definition: NvInferRuntime.h:3252
@ kJSON
Print layer information in JSON format.
@ kONELINE
Print layer information in one line per layer.
constexpr int32_t EnumMax< DeviceType >() noexcept
Maximum number of elements in DeviceType enum.
Definition: NvInferRuntime.h:605
constexpr int32_t EnumMax< DimensionOperation >() noexcept
Maximum number of elements in DimensionOperation enum.
Definition: NvInferRuntime.h:206
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntime.h:216
@ kHOST
Data stored on host.
@ kDEVICE
Data stored on device.
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:1093
@ kOPT
This is used to set or get the value that is used in the optimization (kernel selection).
Definition: NvInferRuntime.h:321
Dims min
Lower bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:326
Dims max
Upper bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:329
PluginTensorDesc desc
Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of ...
Definition: NvInferRuntime.h:323
Fields that a plugin might see for an input or output.
Definition: NvInferRuntimeCommon.h:350
Declaration of EnumMaxImpl struct to store maximum number of elements in an enumeration type.
Definition: NvInferRuntimeCommon.h:102

  Copyright © 2024 NVIDIA Corporation
  Privacy Policy | Manage My Privacy | Do Not Sell or Share My Data | Terms of Service | Accessibility | Corporate Policies | Product Security | Contact