TensorRT 8.6.0
NvInferRuntime.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
4 *
5 * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
6 * property and proprietary rights in and to this material, related
7 * documentation and any modifications thereto. Any use, reproduction,
8 * disclosure or distribution of this material and related documentation
9 * without an express license agreement from NVIDIA CORPORATION or
10 * its affiliates is strictly prohibited.
11 */
12
13#ifndef NV_INFER_RUNTIME_H
14#define NV_INFER_RUNTIME_H
15
21
22#include "NvInferImpl.h"
24
25namespace nvinfer1
26{
27
28class IExecutionContext;
29class ICudaEngine;
30class IPluginFactory;
31class IEngineInspector;
32
41
43{
44protected:
45 INoCopy() = default;
46 virtual ~INoCopy() = default;
47 INoCopy(INoCopy const& other) = delete;
48 INoCopy& operator=(INoCopy const& other) = delete;
49 INoCopy(INoCopy&& other) = delete;
50 INoCopy& operator=(INoCopy&& other) = delete;
51};
52
67
68enum class EngineCapability : int32_t
69{
74 kSTANDARD = 0,
75
78
85 kSAFETY = 1,
86
89
96
99};
100
101namespace impl
102{
104template <>
106{
107 static constexpr int32_t kVALUE = 3;
108};
109} // namespace impl
110
126{
127public:
129 void const* values;
130 int64_t count;
131};
132
143class IHostMemory : public INoCopy
144{
145public:
146 virtual ~IHostMemory() noexcept = default;
147
149 void* data() const noexcept
150 {
151 return mImpl->data();
152 }
153
155 std::size_t size() const noexcept
156 {
157 return mImpl->size();
158 }
159
161 DataType type() const noexcept
162 {
163 return mImpl->type();
164 }
172 TRT_DEPRECATED void destroy() noexcept
173 {
174 delete this;
175 }
176
177protected:
178 apiv::VHostMemory* mImpl;
179};
180
191enum class DimensionOperation : int32_t
192{
193 kSUM = 0,
194 kPROD = 1,
195 kMAX = 2,
196 kMIN = 3,
197 kSUB = 4,
198 kEQUAL = 5,
199 kLESS = 6,
200 kFLOOR_DIV = 7,
201 kCEIL_DIV = 8
202};
203
205template <>
206constexpr inline int32_t EnumMax<DimensionOperation>() noexcept
207{
208 return 9;
209}
210
215enum class TensorLocation : int32_t
216{
217 kDEVICE = 0,
218 kHOST = 1,
219};
220
221namespace impl
222{
224template <>
226{
227 static constexpr int32_t kVALUE = 2;
228};
229} // namespace impl
230
244{
245public:
247 bool isConstant() const noexcept
248 {
249 return mImpl->isConstant();
250 }
251
254 int32_t getConstantValue() const noexcept
255 {
256 return mImpl->getConstantValue();
257 }
258
259protected:
260 apiv::VDimensionExpr* mImpl;
261 virtual ~IDimensionExpr() noexcept = default;
262};
263
281class IExprBuilder : public INoCopy
282{
283public:
285 IDimensionExpr const* constant(int32_t value) noexcept
286 {
287 return mImpl->constant(value);
288 }
289
293 DimensionOperation op, IDimensionExpr const& first, IDimensionExpr const& second) noexcept
294 {
295 return mImpl->operation(op, first, second);
296 }
297
298protected:
299 apiv::VExprBuilder* mImpl;
300 virtual ~IExprBuilder() noexcept = default;
301};
302
309{
310public:
311 int32_t nbDims;
313};
314
321{
324
327
330};
331
351{
352public:
353 IPluginV2DynamicExt* clone() const noexcept override = 0;
354
379 virtual DimsExprs getOutputDimensions(
380 int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept = 0;
381
385 static constexpr int32_t kFORMAT_COMBINATION_LIMIT = 100;
386
419 virtual bool supportsFormatCombination(
420 int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
421
460 virtual void configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs,
461 DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
462
472 virtual size_t getWorkspaceSize(PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs,
473 int32_t nbOutputs) const noexcept = 0;
474
487 virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc,
488 void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0;
489
490protected:
498 int32_t getTensorRTVersion() const noexcept override
499 {
500 return (static_cast<int32_t>(PluginVersion::kV2_DYNAMICEXT) << 24 | (NV_TENSORRT_VERSION & 0xFFFFFF));
501 }
502
503 virtual ~IPluginV2DynamicExt() noexcept {}
504
505private:
506 // Following are obsolete base class methods, and must not be implemented or used.
507
508 void configurePlugin(Dims const*, int32_t, Dims const*, int32_t, DataType const*, DataType const*, bool const*,
509 bool const*, PluginFormat, int32_t) noexcept override final
510 {
511 }
512
513 bool supportsFormat(DataType, PluginFormat) const noexcept override final
514 {
515 return false;
516 }
517
518 Dims getOutputDimensions(int32_t, Dims const*, int32_t) noexcept override final
519 {
520 return Dims{-1, {}};
521 }
522
523 bool isOutputBroadcastAcrossBatch(int32_t, bool const*, int32_t) const noexcept override final
524 {
525 return false;
526 }
527
528 bool canBroadcastInputAcrossBatch(int32_t) const noexcept override final
529 {
530 return true;
531 }
532
533 size_t getWorkspaceSize(int32_t) const noexcept override final
534 {
535 return 0;
536 }
537
538 int32_t enqueue(int32_t, void const* const*, void* const*, void*, cudaStream_t) noexcept override final
539 {
540 return 1;
541 }
542};
543
556{
557public:
565 virtual void reportLayerTime(char const* layerName, float ms) noexcept = 0;
566
567 virtual ~IProfiler() noexcept {}
568};
569
576enum class WeightsRole : int32_t
577{
578 kKERNEL = 0,
579 kBIAS = 1,
580 kSHIFT = 2,
581 kSCALE = 3,
582 kCONSTANT = 4,
583 kANY = 5,
584};
585
587template <>
588constexpr inline int32_t EnumMax<WeightsRole>() noexcept
589{
590 return 6;
591}
592
598enum class DeviceType : int32_t
599{
600 kGPU,
601 kDLA,
602};
603
605template <>
606constexpr inline int32_t EnumMax<DeviceType>() noexcept
607{
608 return 2;
609}
610
621enum class TempfileControlFlag : int32_t
622{
625
630};
631
633template <>
634constexpr inline int32_t EnumMax<TempfileControlFlag>() noexcept
635{
636 return 2;
637}
638
644using TempfileControlFlags = uint32_t;
645
653class IRuntime : public INoCopy
654{
655public:
656 virtual ~IRuntime() noexcept = default;
657
673 TRT_DEPRECATED nvinfer1::ICudaEngine* deserializeCudaEngine(
674 void const* blob, std::size_t size, IPluginFactory* pluginFactory) noexcept
675 {
676 return mImpl->deserializeCudaEngine(blob, size, nullptr);
677 }
678
689 void setDLACore(int32_t dlaCore) noexcept
690 {
691 mImpl->setDLACore(dlaCore);
692 }
693
698 int32_t getDLACore() const noexcept
699 {
700 return mImpl->getDLACore();
701 }
702
706 int32_t getNbDLACores() const noexcept
707 {
708 return mImpl->getNbDLACores();
709 }
710
718 TRT_DEPRECATED void destroy() noexcept
719 {
720 delete this;
721 }
722
732 void setGpuAllocator(IGpuAllocator* allocator) noexcept
733 {
734 mImpl->setGpuAllocator(allocator);
735 }
736
748 //
751 void setErrorRecorder(IErrorRecorder* recorder) noexcept
752 {
753 mImpl->setErrorRecorder(recorder);
754 }
755
767 {
768 return mImpl->getErrorRecorder();
769 }
770
781 ICudaEngine* deserializeCudaEngine(void const* blob, std::size_t size) noexcept
782 {
783 return mImpl->deserializeCudaEngine(blob, size, nullptr);
784 }
785
791 ILogger* getLogger() const noexcept
792 {
793 return mImpl->getLogger();
794 }
795
805 bool setMaxThreads(int32_t maxThreads) noexcept
806 {
807 return mImpl->setMaxThreads(maxThreads);
808 }
809
819 int32_t getMaxThreads() const noexcept
820 {
821 return mImpl->getMaxThreads();
822 }
823
854 void setTemporaryDirectory(char const* path) noexcept
855 {
856 return mImpl->setTemporaryDirectory(path);
857 }
858
865 char const* getTemporaryDirectory() const noexcept
866 {
867 return mImpl->getTemporaryDirectory();
868 }
869
882 {
883 return mImpl->setTempfileControlFlags(flags);
884 }
885
894 {
895 return mImpl->getTempfileControlFlags();
896 }
897
904 {
905 return mImpl->getPluginRegistry();
906 }
907
921 IRuntime* loadRuntime(char const* path) noexcept
922 {
923 return mImpl->loadRuntime(path);
924 }
925
933 void setEngineHostCodeAllowed(bool allowed) noexcept
934 {
935 return mImpl->setEngineHostCodeAllowed(allowed);
936 }
937
943 bool getEngineHostCodeAllowed() const noexcept
944 {
945 return mImpl->getEngineHostCodeAllowed();
946 }
947
948protected:
949 apiv::VRuntime* mImpl;
950};
951
959class IRefitter : public INoCopy
960{
961public:
962 virtual ~IRefitter() noexcept = default;
963
977 bool setWeights(char const* layerName, WeightsRole role, Weights weights) noexcept
978 {
979 return mImpl->setWeights(layerName, role, weights);
980 }
981
992 bool refitCudaEngine() noexcept
993 {
994 return mImpl->refitCudaEngine();
995 }
996
1013 int32_t getMissing(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
1014 {
1015 return mImpl->getMissing(size, layerNames, roles);
1016 }
1017
1030 int32_t getAll(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
1031 {
1032 return mImpl->getAll(size, layerNames, roles);
1033 }
1034
1040 TRT_DEPRECATED void destroy() noexcept
1041 {
1042 delete this;
1043 }
1044
1060 bool setDynamicRange(char const* tensorName, float min, float max) noexcept
1061 {
1062 return mImpl->setDynamicRange(tensorName, min, max);
1063 }
1064
1074 float getDynamicRangeMin(char const* tensorName) const noexcept
1075 {
1076 return mImpl->getDynamicRangeMin(tensorName);
1077 }
1078
1088 float getDynamicRangeMax(char const* tensorName) const noexcept
1089 {
1090 return mImpl->getDynamicRangeMax(tensorName);
1091 }
1092
1104 int32_t getTensorsWithDynamicRange(int32_t size, char const** tensorNames) const noexcept
1105 {
1106 return mImpl->getTensorsWithDynamicRange(size, tensorNames);
1107 }
1108
1120 //
1123 void setErrorRecorder(IErrorRecorder* recorder) noexcept
1124 {
1125 mImpl->setErrorRecorder(recorder);
1126 }
1127
1139 {
1140 return mImpl->getErrorRecorder();
1141 }
1142
1159 bool setNamedWeights(char const* name, Weights weights) noexcept
1160 {
1161 return mImpl->setNamedWeights(name, weights);
1162 }
1163
1179 int32_t getMissingWeights(int32_t size, char const** weightsNames) noexcept
1180 {
1181 return mImpl->getMissingWeights(size, weightsNames);
1182 }
1183
1195 int32_t getAllWeights(int32_t size, char const** weightsNames) noexcept
1196 {
1197 return mImpl->getAllWeights(size, weightsNames);
1198 }
1199
1205 ILogger* getLogger() const noexcept
1206 {
1207 return mImpl->getLogger();
1208 }
1209
1219 bool setMaxThreads(int32_t maxThreads) noexcept
1220 {
1221 return mImpl->setMaxThreads(maxThreads);
1222 }
1223
1233 int32_t getMaxThreads() const noexcept
1234 {
1235 return mImpl->getMaxThreads();
1236 }
1237
1238protected:
1239 apiv::VRefitter* mImpl;
1240};
1241
1252enum class OptProfileSelector : int32_t
1253{
1254 kMIN = 0,
1255 kOPT = 1,
1256 kMAX = 2
1257};
1258
1264template <>
1265constexpr inline int32_t EnumMax<OptProfileSelector>() noexcept
1266{
1267 return 3;
1268}
1269
1293{
1294public:
1322 bool setDimensions(char const* inputName, OptProfileSelector select, Dims dims) noexcept
1323 {
1324 return mImpl->setDimensions(inputName, select, dims);
1325 }
1326
1334 Dims getDimensions(char const* inputName, OptProfileSelector select) const noexcept
1335 {
1336 return mImpl->getDimensions(inputName, select);
1337 }
1338
1381 char const* inputName, OptProfileSelector select, int32_t const* values, int32_t nbValues) noexcept
1382 {
1383 return mImpl->setShapeValues(inputName, select, values, nbValues);
1384 }
1385
1394 int32_t getNbShapeValues(char const* inputName) const noexcept
1395 {
1396 return mImpl->getNbShapeValues(inputName);
1397 }
1398
1406 int32_t const* getShapeValues(char const* inputName, OptProfileSelector select) const noexcept
1407 {
1408 return mImpl->getShapeValues(inputName, select);
1409 }
1410
1424 bool setExtraMemoryTarget(float target) noexcept
1425 {
1426 return mImpl->setExtraMemoryTarget(target);
1427 }
1428
1436 float getExtraMemoryTarget() const noexcept
1437 {
1438 return mImpl->getExtraMemoryTarget();
1439 }
1440
1453 bool isValid() const noexcept
1454 {
1455 return mImpl->isValid();
1456 }
1457
1458protected:
1459 apiv::VOptimizationProfile* mImpl;
1460 virtual ~IOptimizationProfile() noexcept = default;
1461};
1462
1471enum class TacticSource : int32_t
1472{
1475 kCUBLAS = 0,
1478 kCUBLAS_LT = 1,
1481 kCUDNN = 2,
1482
1487
1491};
1492
1493template <>
1494constexpr inline int32_t EnumMax<TacticSource>() noexcept
1495{
1496 return 5;
1497}
1498
1505using TacticSources = uint32_t;
1506
1516enum class ProfilingVerbosity : int32_t
1517{
1518 kLAYER_NAMES_ONLY = 0,
1519 kNONE = 1,
1520 kDETAILED = 2,
1521
1526};
1527
1529template <>
1530constexpr inline int32_t EnumMax<ProfilingVerbosity>() noexcept
1531{
1532 return 3;
1533}
1534
1542class ICudaEngine : public INoCopy
1543{
1544public:
1545 virtual ~ICudaEngine() noexcept = default;
1546
1559 TRT_DEPRECATED int32_t getNbBindings() const noexcept
1560 {
1561 return mImpl->getNbBindings();
1562 }
1563
1587 TRT_DEPRECATED int32_t getBindingIndex(char const* name) const noexcept
1588 {
1589 return mImpl->getBindingIndex(name);
1590 }
1591
1610 TRT_DEPRECATED char const* getBindingName(int32_t bindingIndex) const noexcept
1611 {
1612 return mImpl->getBindingName(bindingIndex);
1613 }
1614
1625 TRT_DEPRECATED bool bindingIsInput(int32_t bindingIndex) const noexcept
1626 {
1627 return mImpl->bindingIsInput(bindingIndex);
1628 }
1629
1652 TRT_DEPRECATED Dims getBindingDimensions(int32_t bindingIndex) const noexcept
1653 {
1654 return mImpl->getBindingDimensions(bindingIndex);
1655 }
1656
1667 Dims getTensorShape(char const* tensorName) const noexcept
1668 {
1669 return mImpl->getTensorShape(tensorName);
1670 }
1671
1682 TRT_DEPRECATED DataType getBindingDataType(int32_t bindingIndex) const noexcept
1683 {
1684 return mImpl->getBindingDataType(bindingIndex);
1685 }
1686
1697 DataType getTensorDataType(char const* tensorName) const noexcept
1698 {
1699 return mImpl->getTensorDataType(tensorName);
1700 }
1701
1713 TRT_DEPRECATED int32_t getMaxBatchSize() const noexcept
1714 {
1715 return mImpl->getMaxBatchSize();
1716 }
1717
1727 int32_t getNbLayers() const noexcept
1728 {
1729 return mImpl->getNbLayers();
1730 }
1731
1741 IHostMemory* serialize() const noexcept
1742 {
1743 return mImpl->serialize();
1744 }
1745
1759 {
1760 return mImpl->createExecutionContext();
1761 }
1762
1770 TRT_DEPRECATED void destroy() noexcept
1771 {
1772 delete this;
1773 }
1774
1788 TRT_DEPRECATED TensorLocation getLocation(int32_t bindingIndex) const noexcept
1789 {
1790 return mImpl->getLocation(bindingIndex);
1791 }
1792
1805 TensorLocation getTensorLocation(char const* tensorName) const noexcept
1806 {
1807 return mImpl->getTensorLocation(tensorName);
1808 }
1809
1825 bool isShapeInferenceIO(char const* tensorName) const noexcept
1826 {
1827 return mImpl->isShapeInferenceIO(tensorName);
1828 }
1829
1839 TensorIOMode getTensorIOMode(char const* tensorName) const noexcept
1840 {
1841 return mImpl->getTensorIOMode(tensorName);
1842 }
1843
1849 {
1850 return mImpl->createExecutionContextWithoutDeviceMemory();
1851 }
1852
1858 size_t getDeviceMemorySize() const noexcept
1859 {
1860 return mImpl->getDeviceMemorySize();
1861 }
1862
1868 bool isRefittable() const noexcept
1869 {
1870 return mImpl->isRefittable();
1871 }
1872
1885 TRT_DEPRECATED int32_t getBindingBytesPerComponent(int32_t bindingIndex) const noexcept
1886 {
1887 return mImpl->getBindingBytesPerComponent(bindingIndex);
1888 }
1889
1902 int32_t getTensorBytesPerComponent(char const* tensorName) const noexcept
1903 {
1904 return mImpl->getTensorBytesPerComponent(tensorName);
1905 }
1906
1918 TRT_DEPRECATED int32_t getBindingComponentsPerElement(int32_t bindingIndex) const noexcept
1919 {
1920 return mImpl->getBindingComponentsPerElement(bindingIndex);
1921 }
1922
1935 int32_t getTensorComponentsPerElement(char const* tensorName) const noexcept
1936 {
1937 return mImpl->getTensorComponentsPerElement(tensorName);
1938 }
1939
1949 TRT_DEPRECATED TensorFormat getBindingFormat(int32_t bindingIndex) const noexcept
1950 {
1951 return mImpl->getBindingFormat(bindingIndex);
1952 }
1953
1962 TensorFormat getTensorFormat(char const* tensorName) const noexcept
1963 {
1964 return mImpl->getTensorFormat(tensorName);
1965 }
1966
1986 TRT_DEPRECATED char const* getBindingFormatDesc(int32_t bindingIndex) const noexcept
1987 {
1988 return mImpl->getBindingFormatDesc(bindingIndex);
1989 }
1990
2008 char const* getTensorFormatDesc(char const* tensorName) const noexcept
2009 {
2010 return mImpl->getTensorFormatDesc(tensorName);
2011 }
2012
2024 TRT_DEPRECATED int32_t getBindingVectorizedDim(int32_t bindingIndex) const noexcept
2025 {
2026 return mImpl->getBindingVectorizedDim(bindingIndex);
2027 }
2028
2039 int32_t getTensorVectorizedDim(char const* tensorName) const noexcept
2040 {
2041 return mImpl->getTensorVectorizedDim(tensorName);
2042 }
2043
2054 char const* getName() const noexcept
2055 {
2056 return mImpl->getName();
2057 }
2058
2065 int32_t getNbOptimizationProfiles() const noexcept
2066 {
2067 return mImpl->getNbOptimizationProfiles();
2068 }
2069
2098 int32_t bindingIndex, int32_t profileIndex, OptProfileSelector select) const noexcept
2099 {
2100 return mImpl->getProfileDimensions(bindingIndex, profileIndex, select);
2101 }
2102
2118 Dims getProfileShape(char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
2119 {
2120 return mImpl->getProfileShape(tensorName, profileIndex, select);
2121 }
2122
2148 int32_t profileIndex, int32_t inputIndex, OptProfileSelector select) const noexcept
2149 {
2150 return mImpl->getProfileShapeValues(profileIndex, inputIndex, select);
2151 }
2152
2186 TRT_DEPRECATED bool isShapeBinding(int32_t bindingIndex) const noexcept
2187 {
2188 return mImpl->isShapeBinding(bindingIndex);
2189 }
2190
2203 TRT_DEPRECATED bool isExecutionBinding(int32_t bindingIndex) const noexcept
2204 {
2205 return mImpl->isExecutionBinding(bindingIndex);
2206 }
2207
2219 {
2220 return mImpl->getEngineCapability();
2221 }
2222
2233 //
2236 void setErrorRecorder(IErrorRecorder* recorder) noexcept
2237 {
2238 return mImpl->setErrorRecorder(recorder);
2239 }
2240
2252 {
2253 return mImpl->getErrorRecorder();
2254 }
2255
2270 bool hasImplicitBatchDimension() const noexcept
2271 {
2272 return mImpl->hasImplicitBatchDimension();
2273 }
2274
2286 {
2287 return mImpl->getTacticSources();
2288 }
2289
2297 {
2298 return mImpl->getProfilingVerbosity();
2299 }
2300
2307 {
2308 return mImpl->createEngineInspector();
2309 }
2310
2319 int32_t getNbIOTensors() const noexcept
2320 {
2321 return mImpl->getNbIOTensors();
2322 }
2323
2331 char const* getIOTensorName(int32_t index) const noexcept
2332 {
2333 return mImpl->getIOTensorName(index);
2334 }
2335
2344 {
2345 return mImpl->getHardwareCompatibilityLevel();
2346 }
2347
2358 int32_t getNbAuxStreams() const noexcept
2359 {
2360 return mImpl->getNbAuxStreams();
2361 }
2362
2363protected:
2364 apiv::VCudaEngine* mImpl;
2365};
2366
2377{
2378public:
2386 virtual int32_t getInterfaceVersion() const noexcept
2387 {
2388 return 1;
2389 }
2390
2407 virtual void* reallocateOutput(char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment) noexcept = 0;
2408
2417 virtual void notifyShape(char const* tensorName, Dims const& dims) noexcept = 0;
2418
2419 virtual ~IOutputAllocator() = default;
2420};
2421
2433{
2434public:
2435 virtual ~IExecutionContext() noexcept = default;
2436
2459 TRT_DEPRECATED bool execute(int32_t batchSize, void* const* bindings) noexcept
2460 {
2461 return mImpl->execute(batchSize, bindings);
2462 }
2463
2494 int32_t batchSize, void* const* bindings, cudaStream_t stream, cudaEvent_t* inputConsumed) noexcept
2495 {
2496 return mImpl->enqueue(batchSize, bindings, stream, inputConsumed);
2497 }
2498
2507 void setDebugSync(bool sync) noexcept
2508 {
2509 mImpl->setDebugSync(sync);
2510 }
2511
2517 bool getDebugSync() const noexcept
2518 {
2519 return mImpl->getDebugSync();
2520 }
2521
2527 void setProfiler(IProfiler* profiler) noexcept
2528 {
2529 mImpl->setProfiler(profiler);
2530 }
2531
2537 IProfiler* getProfiler() const noexcept
2538 {
2539 return mImpl->getProfiler();
2540 }
2541
2547 ICudaEngine const& getEngine() const noexcept
2548 {
2549 return mImpl->getEngine();
2550 }
2551
2559 TRT_DEPRECATED void destroy() noexcept
2560 {
2561 delete this;
2562 }
2563
2573 void setName(char const* name) noexcept
2574 {
2575 mImpl->setName(name);
2576 }
2577
2583 char const* getName() const noexcept
2584 {
2585 return mImpl->getName();
2586 }
2587
2600 void setDeviceMemory(void* memory) noexcept
2601 {
2602 mImpl->setDeviceMemory(memory);
2603 }
2604
2623 TRT_DEPRECATED Dims getStrides(int32_t bindingIndex) const noexcept
2624 {
2625 return mImpl->getStrides(bindingIndex);
2626 }
2627
2644 Dims getTensorStrides(char const* tensorName) const noexcept
2645 {
2646 return mImpl->getTensorStrides(tensorName);
2647 }
2648
2649public:
2682 bool setOptimizationProfile(int32_t profileIndex) noexcept
2683 {
2684 return mImpl->setOptimizationProfile(profileIndex);
2685 }
2686
2696 int32_t getOptimizationProfile() const noexcept
2697 {
2698 return mImpl->getOptimizationProfile();
2699 }
2700
2735 TRT_DEPRECATED bool setBindingDimensions(int32_t bindingIndex, Dims dimensions) noexcept
2736 {
2737 return mImpl->setBindingDimensions(bindingIndex, dimensions);
2738 }
2739
2753 bool setInputShape(char const* tensorName, Dims const& dims) noexcept
2754 {
2755 return mImpl->setInputShape(tensorName, dims);
2756 }
2757
2786 TRT_DEPRECATED Dims getBindingDimensions(int32_t bindingIndex) const noexcept
2787 {
2788 return mImpl->getBindingDimensions(bindingIndex);
2789 }
2790
2823 Dims getTensorShape(char const* tensorName) const noexcept
2824 {
2825 return mImpl->getTensorShape(tensorName);
2826 }
2827
2858 TRT_DEPRECATED bool setInputShapeBinding(int32_t bindingIndex, int32_t const* data) noexcept
2859 {
2860 return mImpl->setInputShapeBinding(bindingIndex, data);
2861 }
2862
2882 TRT_DEPRECATED bool getShapeBinding(int32_t bindingIndex, int32_t* data) const noexcept
2883 {
2884 return mImpl->getShapeBinding(bindingIndex, data);
2885 }
2886
2900 bool allInputDimensionsSpecified() const noexcept
2901 {
2902 return mImpl->allInputDimensionsSpecified();
2903 }
2904
2917 bool allInputShapesSpecified() const noexcept
2918 {
2919 return mImpl->allInputShapesSpecified();
2920 }
2921
2933 //
2936 void setErrorRecorder(IErrorRecorder* recorder) noexcept
2937 {
2938 mImpl->setErrorRecorder(recorder);
2939 }
2940
2952 {
2953 return mImpl->getErrorRecorder();
2954 }
2955
2968 bool executeV2(void* const* bindings) noexcept
2969 {
2970 return mImpl->executeV2(bindings);
2971 }
2972
2998 TRT_DEPRECATED bool enqueueV2(void* const* bindings, cudaStream_t stream, cudaEvent_t* inputConsumed) noexcept
2999 {
3000 return mImpl->enqueueV2(bindings, stream, inputConsumed);
3001 }
3002
3045 bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
3046 {
3047 return mImpl->setOptimizationProfileAsync(profileIndex, stream);
3048 }
3049
3060 void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
3061 {
3062 mImpl->setEnqueueEmitsProfile(enqueueEmitsProfile);
3063 }
3064
3071 bool getEnqueueEmitsProfile() const noexcept
3072 {
3073 return mImpl->getEnqueueEmitsProfile();
3074 }
3075
3100 bool reportToProfiler() const noexcept
3101 {
3102 return mImpl->reportToProfiler();
3103 }
3104
3142 bool setTensorAddress(char const* tensorName, void* data) noexcept
3143 {
3144 return mImpl->setTensorAddress(tensorName, data);
3145 }
3146
3159 void const* getTensorAddress(char const* tensorName) const noexcept
3160 {
3161 return mImpl->getTensorAddress(tensorName);
3162 }
3163
3181 bool setInputTensorAddress(char const* tensorName, void const* data) noexcept
3182 {
3183 return mImpl->setInputTensorAddress(tensorName, data);
3184 }
3185
3200 void* getOutputTensorAddress(char const* tensorName) const noexcept
3201 {
3202 return mImpl->getOutputTensorAddress(tensorName);
3203 }
3204
3233 int32_t inferShapes(int32_t nbMaxNames, char const** tensorNames) noexcept
3234 {
3235 return mImpl->inferShapes(nbMaxNames, tensorNames);
3236 }
3237
3249 bool setInputConsumedEvent(cudaEvent_t event) noexcept
3250 {
3251 return mImpl->setInputConsumedEvent(event);
3252 }
3253
3259 cudaEvent_t getInputConsumedEvent() const noexcept
3260 {
3261 return mImpl->getInputConsumedEvent();
3262 }
3263
3278 bool setOutputAllocator(char const* tensorName, IOutputAllocator* outputAllocator) noexcept
3279 {
3280 return mImpl->setOutputAllocator(tensorName, outputAllocator);
3281 }
3282
3291 IOutputAllocator* getOutputAllocator(char const* tensorName) const noexcept
3292 {
3293 return mImpl->getOutputAllocator(tensorName);
3294 }
3295
3309 int64_t getMaxOutputSize(char const* tensorName) const noexcept
3310 {
3311 return mImpl->getMaxOutputSize(tensorName);
3312 }
3313
3330 {
3331 return mImpl->setTemporaryStorageAllocator(allocator);
3332 }
3333
3340 {
3341 return mImpl->getTemporaryStorageAllocator();
3342 }
3343
3357 bool enqueueV3(cudaStream_t stream) noexcept
3358 {
3359 return mImpl->enqueueV3(stream);
3360 }
3361
3372 void setPersistentCacheLimit(size_t size) noexcept
3373 {
3374 mImpl->setPersistentCacheLimit(size);
3375 }
3376
3383 size_t getPersistentCacheLimit() const noexcept
3384 {
3385 return mImpl->getPersistentCacheLimit();
3386 }
3387
3407 bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
3408 {
3409 return mImpl->setNvtxVerbosity(verbosity);
3410 }
3411
3420 {
3421 return mImpl->getNvtxVerbosity();
3422 }
3423
3450 void setAuxStreams(cudaStream_t* auxStreams, int32_t nbStreams) noexcept
3451 {
3452 mImpl->setAuxStreams(auxStreams, nbStreams);
3453 }
3454
3455protected:
3456 apiv::VExecutionContext* mImpl;
3457}; // class IExecutionContext
3458
3466enum class LayerInformationFormat : int32_t
3467{
3468 kONELINE = 0,
3469 kJSON = 1,
3470};
3471
3474template <>
3475constexpr inline int32_t EnumMax<LayerInformationFormat>() noexcept
3476{
3477 return 2;
3478}
3479
3496{
3497public:
3498 virtual ~IEngineInspector() noexcept = default;
3499
3512 bool setExecutionContext(IExecutionContext const* context) noexcept
3513 {
3514 return mImpl->setExecutionContext(context);
3515 }
3516
3525 {
3526 return mImpl->getExecutionContext();
3527 }
3528
3549 char const* getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
3550 {
3551 return mImpl->getLayerInformation(layerIndex, format);
3552 }
3553
3572 char const* getEngineInformation(LayerInformationFormat format) const noexcept
3573 {
3574 return mImpl->getEngineInformation(format);
3575 }
3576
3588 //
3591 void setErrorRecorder(IErrorRecorder* recorder) noexcept
3592 {
3593 mImpl->setErrorRecorder(recorder);
3594 }
3595
3607 {
3608 return mImpl->getErrorRecorder();
3609 }
3610
3611protected:
3612 apiv::VEngineInspector* mImpl;
3613}; // class IEngineInspector
3614
3615} // namespace nvinfer1
3616
3621extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(void* logger, int32_t version) noexcept;
3622
3627extern "C" TENSORRTAPI void* createInferRefitter_INTERNAL(void* engine, void* logger, int32_t version) noexcept;
3628
3633
3639extern "C" TENSORRTAPI nvinfer1::ILogger* getLogger() noexcept;
3640
3641namespace nvinfer1
3642{
3643namespace // unnamed namespace avoids linkage surprises when linking objects built with different versions of this
3644 // header.
3645{
3651inline IRuntime* createInferRuntime(ILogger& logger) noexcept
3652{
3653 return static_cast<IRuntime*>(createInferRuntime_INTERNAL(&logger, NV_TENSORRT_VERSION));
3654}
3655
3661inline IRefitter* createInferRefitter(ICudaEngine& engine, ILogger& logger) noexcept
3662{
3663 return static_cast<IRefitter*>(createInferRefitter_INTERNAL(&engine, &logger, NV_TENSORRT_VERSION));
3664}
3665
3666} // namespace
3667
3679template <typename T>
3681{
3682public:
3684 {
3685 getPluginRegistry()->registerCreator(instance, "");
3686 }
3687
3688private:
3690 T instance{};
3691};
3692
3693} // namespace nvinfer1
3694
3695#define REGISTER_TENSORRT_PLUGIN(name) \
3696 static nvinfer1::PluginRegistrar<name> pluginRegistrar##name {}
3697
3698namespace nvinfer1
3699{
3709{
3710public:
3718 virtual ILogger* findLogger() = 0;
3719
3720protected:
3721 virtual ~ILoggerFinder() = default;
3722};
3723
3724} // namespace nvinfer1
3725
3726#endif // NV_INFER_RUNTIME_H
nvinfer1::ILogger * getLogger() noexcept
Return the logger object.
nvinfer1::IPluginRegistry * getPluginRegistry() noexcept
Return the plugin registry.
#define TENSORRTAPI
Definition: NvInferRuntimeBase.h:54
#define NV_TENSORRT_VERSION
Definition: NvInferRuntimeBase.h:76
#define TRT_DEPRECATED
Definition: NvInferRuntimeBase.h:40
#define TRT_DEPRECATED_ENUM
Definition: NvInferRuntimeBase.h:41
Structure to define the dimensions of a tensor.
Definition: NvInferRuntimeBase.h:179
static constexpr int32_t MAX_DIMS
The maximum rank (number of dimensions) supported for a tensor.
Definition: NvInferRuntimeBase.h:182
Definition: NvInferRuntime.h:309
int32_t nbDims
The number of dimensions.
Definition: NvInferRuntime.h:311
An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:1543
int32_t getTensorBytesPerComponent(char const *tensorName) const noexcept
Return the number of bytes per component of an element, or -1 if the provided name does not map to an...
Definition: NvInferRuntime.h:1902
bool hasImplicitBatchDimension() const noexcept
Query whether the engine was built with an implicit batch dimension.
Definition: NvInferRuntime.h:2270
char const * getIOTensorName(int32_t index) const noexcept
Return name of an IO tensor.
Definition: NvInferRuntime.h:2331
EngineCapability getEngineCapability() const noexcept
Determine what execution capability this engine has.
Definition: NvInferRuntime.h:2218
TRT_DEPRECATED bool isExecutionBinding(int32_t bindingIndex) const noexcept
True if pointer to tensor data is required for execution phase, false if nullptr can be supplied.
Definition: NvInferRuntime.h:2203
TRT_DEPRECATED int32_t getBindingIndex(char const *name) const noexcept
Retrieve the binding index for a named tensor.
Definition: NvInferRuntime.h:1587
TRT_DEPRECATED void destroy() noexcept
Destroy this object;.
Definition: NvInferRuntime.h:1770
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:2251
TRT_DEPRECATED char const * getBindingName(int32_t bindingIndex) const noexcept
Retrieve the name corresponding to a binding index.
Definition: NvInferRuntime.h:1610
apiv::VCudaEngine * mImpl
Definition: NvInferRuntime.h:2364
char const * getTensorFormatDesc(char const *tensorName) const noexcept
Return the human readable description of the tensor format, or empty string if the provided name does...
Definition: NvInferRuntime.h:2008
Dims getProfileShape(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for an input tensor given its name under an optimizati...
Definition: NvInferRuntime.h:2118
TRT_DEPRECATED int32_t const * getProfileShapeValues(int32_t profileIndex, int32_t inputIndex, OptProfileSelector select) const noexcept
Get minimum / optimum / maximum values for an input shape binding under an optimization profile.
Definition: NvInferRuntime.h:2147
int32_t getNbAuxStreams() const noexcept
Return the number of auxiliary streams used by this engine.
Definition: NvInferRuntime.h:2358
TRT_DEPRECATED bool bindingIsInput(int32_t bindingIndex) const noexcept
Determine whether a binding is an input binding.
Definition: NvInferRuntime.h:1625
TRT_DEPRECATED Dims getBindingDimensions(int32_t bindingIndex) const noexcept
Get the dimensions of a binding.
Definition: NvInferRuntime.h:1652
DataType getTensorDataType(char const *tensorName) const noexcept
Determine the required data type for a buffer from its tensor name.
Definition: NvInferRuntime.h:1697
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:2236
size_t getDeviceMemorySize() const noexcept
Return the amount of device memory required by an execution context.
Definition: NvInferRuntime.h:1858
TacticSources getTacticSources() const noexcept
return the tactic sources required by this engine.
Definition: NvInferRuntime.h:2285
TRT_DEPRECATED TensorFormat getBindingFormat(int32_t bindingIndex) const noexcept
Return the binding format.
Definition: NvInferRuntime.h:1949
virtual ~ICudaEngine() noexcept=default
TRT_DEPRECATED int32_t getBindingComponentsPerElement(int32_t bindingIndex) const noexcept
Return the number of components included in one element.
Definition: NvInferRuntime.h:1918
char const * getName() const noexcept
Returns the name of the network associated with the engine.
Definition: NvInferRuntime.h:2054
ProfilingVerbosity getProfilingVerbosity() const noexcept
Return the ProfilingVerbosity the builder config was set to when the engine was built.
Definition: NvInferRuntime.h:2296
bool isShapeInferenceIO(char const *tensorName) const noexcept
True if tensor is required as input for shape calculations or is output from shape calculations.
Definition: NvInferRuntime.h:1825
TRT_DEPRECATED bool isShapeBinding(int32_t bindingIndex) const noexcept
True if tensor is required as input for shape calculations or output from them.
Definition: NvInferRuntime.h:2186
int32_t getTensorVectorizedDim(char const *tensorName) const noexcept
Return the dimension index that the buffer is vectorized, or -1 if the provided name does not map to ...
Definition: NvInferRuntime.h:2039
TRT_DEPRECATED DataType getBindingDataType(int32_t bindingIndex) const noexcept
Determine the required data type for a buffer from its binding index.
Definition: NvInferRuntime.h:1682
TRT_DEPRECATED int32_t getBindingVectorizedDim(int32_t bindingIndex) const noexcept
Return the dimension index that the buffer is vectorized, or -1 is the name is not found.
Definition: NvInferRuntime.h:2024
TRT_DEPRECATED char const * getBindingFormatDesc(int32_t bindingIndex) const noexcept
Return the human readable description of the tensor format, or nullptr if the provided name does not ...
Definition: NvInferRuntime.h:1986
TensorFormat getTensorFormat(char const *tensorName) const noexcept
Return the binding format, or TensorFormat::kLINEAR if the provided name does not map to an input or ...
Definition: NvInferRuntime.h:1962
IHostMemory * serialize() const noexcept
Serialize the network to a stream.
Definition: NvInferRuntime.h:1741
IExecutionContext * createExecutionContextWithoutDeviceMemory() noexcept
create an execution context without any device memory allocated
Definition: NvInferRuntime.h:1848
TensorLocation getTensorLocation(char const *tensorName) const noexcept
Get whether an input or output tensor must be on GPU or CPU.
Definition: NvInferRuntime.h:1805
IExecutionContext * createExecutionContext() noexcept
Create an execution context.
Definition: NvInferRuntime.h:1758
IEngineInspector * createEngineInspector() const noexcept
Create a new engine inspector which prints the layer information in an engine or an execution context...
Definition: NvInferRuntime.h:2306
HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept
Return the hardware compatibility level of this engine.
Definition: NvInferRuntime.h:2343
TRT_DEPRECATED int32_t getMaxBatchSize() const noexcept
Get the maximum batch size which can be used for inference. Should only be called if the engine is bu...
Definition: NvInferRuntime.h:1713
int32_t getNbOptimizationProfiles() const noexcept
Get the number of optimization profiles defined for this engine.
Definition: NvInferRuntime.h:2065
TensorIOMode getTensorIOMode(char const *tensorName) const noexcept
Determine whether a tensor is an input or output tensor.
Definition: NvInferRuntime.h:1839
int32_t getNbLayers() const noexcept
Get the number of layers in the network.
Definition: NvInferRuntime.h:1727
TRT_DEPRECATED TensorLocation getLocation(int32_t bindingIndex) const noexcept
Get location of binding.
Definition: NvInferRuntime.h:1788
int32_t getNbIOTensors() const noexcept
Return number of IO tensors.
Definition: NvInferRuntime.h:2319
TRT_DEPRECATED Dims getProfileDimensions(int32_t bindingIndex, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a particular input binding under an optimization p...
Definition: NvInferRuntime.h:2097
TRT_DEPRECATED int32_t getBindingBytesPerComponent(int32_t bindingIndex) const noexcept
Return the number of bytes per component of an element.
Definition: NvInferRuntime.h:1885
Dims getTensorShape(char const *tensorName) const noexcept
Get shape of an input or output tensor.
Definition: NvInferRuntime.h:1667
int32_t getTensorComponentsPerElement(char const *tensorName) const noexcept
Return the number of components included in one element, or -1 if the provided name does not map to a...
Definition: NvInferRuntime.h:1935
bool isRefittable() const noexcept
Return true if an engine can be refit.
Definition: NvInferRuntime.h:1868
Definition: NvInferRuntime.h:244
bool isConstant() const noexcept
Return true if expression is a build-time constant.
Definition: NvInferRuntime.h:247
virtual ~IDimensionExpr() noexcept=default
apiv::VDimensionExpr * mImpl
Definition: NvInferRuntime.h:260
int32_t getConstantValue() const noexcept
Definition: NvInferRuntime.h:254
An engine inspector which prints out the layer information of an engine or an execution context.
Definition: NvInferRuntime.h:3496
char const * getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
Get a string describing the information about a specific layer in the current engine or the execution...
Definition: NvInferRuntime.h:3549
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:3606
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:3591
IExecutionContext const * getExecutionContext() const noexcept
Get the context currently being inspected.
Definition: NvInferRuntime.h:3524
apiv::VEngineInspector * mImpl
Definition: NvInferRuntime.h:3612
virtual ~IEngineInspector() noexcept=default
char const * getEngineInformation(LayerInformationFormat format) const noexcept
Get a string describing the information about all the layers in the current engine or the execution c...
Definition: NvInferRuntime.h:3572
Reference counted application-implemented error reporting interface for TensorRT objects.
Definition: NvInferRuntimeBase.h:694
Context for executing inference using an engine, with functionally unsafe features.
Definition: NvInferRuntime.h:2433
TRT_DEPRECATED bool enqueue(int32_t batchSize, void *const *bindings, cudaStream_t stream, cudaEvent_t *inputConsumed) noexcept
Enqueue inference of a batch on a stream.
Definition: NvInferRuntime.h:2493
IOutputAllocator * getOutputAllocator(char const *tensorName) const noexcept
Get output allocator associated with output tensor of given name, or nullptr if the provided name doe...
Definition: NvInferRuntime.h:3291
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:2951
bool reportToProfiler() const noexcept
Calculate layer timing info for the current optimization profile in IExecutionContext and update the ...
Definition: NvInferRuntime.h:3100
virtual ~IExecutionContext() noexcept=default
void setDeviceMemory(void *memory) noexcept
Set the device memory for use by this execution context.
Definition: NvInferRuntime.h:2600
char const * getName() const noexcept
Return the name of the execution context.
Definition: NvInferRuntime.h:2583
void setDebugSync(bool sync) noexcept
Set the debug sync flag.
Definition: NvInferRuntime.h:2507
IGpuAllocator * getTemporaryStorageAllocator() const noexcept
Get allocator set by setTemporaryStorageAllocator.
Definition: NvInferRuntime.h:3339
void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
Set whether enqueue emits layer timing to the profiler.
Definition: NvInferRuntime.h:3060
Dims getTensorShape(char const *tensorName) const noexcept
Return the shape of the given input or output.
Definition: NvInferRuntime.h:2823
bool setInputShape(char const *tensorName, Dims const &dims) noexcept
Set shape of given input.
Definition: NvInferRuntime.h:2753
bool executeV2(void *const *bindings) noexcept
Synchronously execute inference a network.
Definition: NvInferRuntime.h:2968
bool getEnqueueEmitsProfile() const noexcept
Get the enqueueEmitsProfile state.
Definition: NvInferRuntime.h:3071
TRT_DEPRECATED bool setBindingDimensions(int32_t bindingIndex, Dims dimensions) noexcept
Set the dynamic dimensions of an input binding.
Definition: NvInferRuntime.h:2735
void const * getTensorAddress(char const *tensorName) const noexcept
Get memory address bound to given input or output tensor, or nullptr if the provided name does not ma...
Definition: NvInferRuntime.h:3159
bool setOutputAllocator(char const *tensorName, IOutputAllocator *outputAllocator) noexcept
Set output allocator to use for output tensor of given name. Pass nullptr to outputAllocator to unset...
Definition: NvInferRuntime.h:3278
TRT_DEPRECATED bool enqueueV2(void *const *bindings, cudaStream_t stream, cudaEvent_t *inputConsumed) noexcept
Enqueue inference on a stream.
Definition: NvInferRuntime.h:2998
bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
Select an optimization profile for the current context with async semantics.
Definition: NvInferRuntime.h:3045
TRT_DEPRECATED bool setInputShapeBinding(int32_t bindingIndex, int32_t const *data) noexcept
Set values of input tensor required by shape calculations.
Definition: NvInferRuntime.h:2858
apiv::VExecutionContext * mImpl
Definition: NvInferRuntime.h:3456
void setPersistentCacheLimit(size_t size) noexcept
Set the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:3372
size_t getPersistentCacheLimit() const noexcept
Get the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:3383
ICudaEngine const & getEngine() const noexcept
Get the associated engine.
Definition: NvInferRuntime.h:2547
ProfilingVerbosity getNvtxVerbosity() const noexcept
Get the NVTX verbosity of the execution context.
Definition: NvInferRuntime.h:3419
void setAuxStreams(cudaStream_t *auxStreams, int32_t nbStreams) noexcept
Set the auxiliary streams that TensorRT should launch kernels on in the next enqueueV3() call.
Definition: NvInferRuntime.h:3450
int64_t getMaxOutputSize(char const *tensorName) const noexcept
Get upper bound on an output tensor's size, in bytes, based on the current optimization profile and i...
Definition: NvInferRuntime.h:3309
int32_t inferShapes(int32_t nbMaxNames, char const **tensorNames) noexcept
Run shape calculations.
Definition: NvInferRuntime.h:3233
bool setTensorAddress(char const *tensorName, void *data) noexcept
Set memory address for given input or output tensor.
Definition: NvInferRuntime.h:3142
bool setTemporaryStorageAllocator(IGpuAllocator *allocator) noexcept
Specify allocator to use for internal temporary storage.
Definition: NvInferRuntime.h:3329
void * getOutputTensorAddress(char const *tensorName) const noexcept
Get memory address for given output.
Definition: NvInferRuntime.h:3200
bool enqueueV3(cudaStream_t stream) noexcept
Enqueue inference on a stream.
Definition: NvInferRuntime.h:3357
int32_t getOptimizationProfile() const noexcept
Get the index of the currently selected optimization profile.
Definition: NvInferRuntime.h:2696
bool setInputTensorAddress(char const *tensorName, void const *data) noexcept
Set memory address for given input.
Definition: NvInferRuntime.h:3181
TRT_DEPRECATED void destroy() noexcept
Destroy this object.
Definition: NvInferRuntime.h:2559
bool getDebugSync() const noexcept
Get the debug sync flag.
Definition: NvInferRuntime.h:2517
TRT_DEPRECATED Dims getBindingDimensions(int32_t bindingIndex) const noexcept
Get the dynamic dimensions of a binding.
Definition: NvInferRuntime.h:2786
bool setInputConsumedEvent(cudaEvent_t event) noexcept
Mark input as consumed.
Definition: NvInferRuntime.h:3249
Dims getTensorStrides(char const *tensorName) const noexcept
Return the strides of the buffer for the given tensor name.
Definition: NvInferRuntime.h:2644
bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
Set the verbosity of the NVTX markers in the execution context.
Definition: NvInferRuntime.h:3407
IProfiler * getProfiler() const noexcept
Get the profiler.
Definition: NvInferRuntime.h:2537
TRT_DEPRECATED Dims getStrides(int32_t bindingIndex) const noexcept
Return the strides of the buffer for the given binding.
Definition: NvInferRuntime.h:2623
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:2936
TRT_DEPRECATED bool setOptimizationProfile(int32_t profileIndex) noexcept
Select an optimization profile for the current context.
Definition: NvInferRuntime.h:2682
bool allInputDimensionsSpecified() const noexcept
Whether all dynamic dimensions of input tensors have been specified.
Definition: NvInferRuntime.h:2900
void setProfiler(IProfiler *profiler) noexcept
Set the profiler.
Definition: NvInferRuntime.h:2527
TRT_DEPRECATED bool getShapeBinding(int32_t bindingIndex, int32_t *data) const noexcept
Get values of an input tensor required for shape calculations or an output tensor produced by shape c...
Definition: NvInferRuntime.h:2882
void setName(char const *name) noexcept
Set the name of the execution context.
Definition: NvInferRuntime.h:2573
cudaEvent_t getInputConsumedEvent() const noexcept
The event associated with consuming the input.
Definition: NvInferRuntime.h:3259
bool allInputShapesSpecified() const noexcept
Whether all input shape bindings have been specified.
Definition: NvInferRuntime.h:2917
Definition: NvInferRuntime.h:282
IDimensionExpr const * operation(DimensionOperation op, IDimensionExpr const &first, IDimensionExpr const &second) noexcept
Definition: NvInferRuntime.h:292
virtual ~IExprBuilder() noexcept=default
apiv::VExprBuilder * mImpl
Definition: NvInferRuntime.h:299
IDimensionExpr const * constant(int32_t value) noexcept
Return pointer to IDimensionExp for given value.
Definition: NvInferRuntime.h:285
Application-implemented class for controlling allocation on the GPU.
Definition: NvInferRuntimeBase.h:367
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:144
void * data() const noexcept
A pointer to the raw data that is owned by the library.
Definition: NvInferRuntime.h:149
DataType type() const noexcept
The type of the memory that was allocated.
Definition: NvInferRuntime.h:161
TRT_DEPRECATED void destroy() noexcept
Definition: NvInferRuntime.h:172
std::size_t size() const noexcept
The size in bytes of the data that was allocated.
Definition: NvInferRuntime.h:155
apiv::VHostMemory * mImpl
Definition: NvInferRuntime.h:178
virtual ~IHostMemory() noexcept=default
A virtual base class to find a logger. Allows a plugin to find an instance of a logger if it needs to...
Definition: NvInferRuntime.h:3709
virtual ILogger * findLogger()=0
Get the logger used by the engine or execution context which called the plugin method.
virtual ~ILoggerFinder()=default
Application-implemented logging interface for the builder, refitter and runtime.
Definition: NvInferRuntimeBase.h:505
Forward declaration of IEngineInspector for use by other interfaces.
Definition: NvInferRuntime.h:43
INoCopy & operator=(INoCopy &&other)=delete
INoCopy(INoCopy const &other)=delete
INoCopy(INoCopy &&other)=delete
virtual ~INoCopy()=default
INoCopy & operator=(INoCopy const &other)=delete
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:1293
int32_t const * getShapeValues(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:1406
apiv::VOptimizationProfile * mImpl
Definition: NvInferRuntime.h:1459
Dims getDimensions(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:1334
virtual ~IOptimizationProfile() noexcept=default
float getExtraMemoryTarget() const noexcept
Get the extra memory target that has been defined for this profile.
Definition: NvInferRuntime.h:1436
bool setDimensions(char const *inputName, OptProfileSelector select, Dims dims) noexcept
Set the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:1322
bool setExtraMemoryTarget(float target) noexcept
Set a target for extra GPU memory that may be used by this profile.
Definition: NvInferRuntime.h:1424
bool setShapeValues(char const *inputName, OptProfileSelector select, int32_t const *values, int32_t nbValues) noexcept
Set the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:1380
bool isValid() const noexcept
Check whether the optimization profile can be passed to an IBuilderConfig object.
Definition: NvInferRuntime.h:1453
int32_t getNbShapeValues(char const *inputName) const noexcept
Get the number of values for an input shape tensor.
Definition: NvInferRuntime.h:1394
Callback from ExecutionContext::enqueueV3()
Definition: NvInferRuntime.h:2377
virtual void notifyShape(char const *tensorName, Dims const &dims) noexcept=0
Called by TensorRT when the shape of the output tensor is known.
virtual ~IOutputAllocator()=default
virtual int32_t getInterfaceVersion() const noexcept
Return the API version of this IOutputAllocator.
Definition: NvInferRuntime.h:2386
virtual void * reallocateOutput(char const *tensorName, void *currentMemory, uint64_t size, uint64_t alignment) noexcept=0
Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated.
Single registration point for all plugins in an application. It is used to find plugin implementation...
Definition: NvInferRuntimeCommon.h:50
virtual bool registerCreator(IPluginCreator &creator, AsciiChar const *const pluginNamespace) noexcept=0
Register a plugin creator. Returns false if one with same type is already registered.
Definition: NvInferRuntime.h:351
IPluginV2DynamicExt * clone() const noexcept override=0
Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin...
virtual ~IPluginV2DynamicExt() noexcept
Definition: NvInferRuntime.h:503
Plugin class for user-implemented layers.
Definition: NvInferRuntimePlugin.h:408
Application-implemented interface for profiling.
Definition: NvInferRuntime.h:556
virtual void reportLayerTime(char const *layerName, float ms) noexcept=0
Layer time reporting callback.
virtual ~IProfiler() noexcept
Definition: NvInferRuntime.h:567
Updates weights in an engine.
Definition: NvInferRuntime.h:960
int32_t getMaxThreads() const noexcept
get the maximum number of threads that can be used by the refitter.
Definition: NvInferRuntime.h:1233
bool setNamedWeights(char const *name, Weights weights) noexcept
Specify new weights of given name.
Definition: NvInferRuntime.h:1159
int32_t getAllWeights(int32_t size, char const **weightsNames) noexcept
Get names of all weights that could be refit.
Definition: NvInferRuntime.h:1195
bool setDynamicRange(char const *tensorName, float min, float max) noexcept
Definition: NvInferRuntime.h:1060
ILogger * getLogger() const noexcept
get the logger with which the refitter was created
Definition: NvInferRuntime.h:1205
int32_t getTensorsWithDynamicRange(int32_t size, char const **tensorNames) const noexcept
Get names of all tensors that have refittable dynamic ranges.
Definition: NvInferRuntime.h:1104
float getDynamicRangeMin(char const *tensorName) const noexcept
Get minimum of dynamic range.
Definition: NvInferRuntime.h:1074
bool refitCudaEngine() noexcept
Updates associated engine. Return true if successful.
Definition: NvInferRuntime.h:992
int32_t getMissingWeights(int32_t size, char const **weightsNames) noexcept
Get names of missing weights.
Definition: NvInferRuntime.h:1179
int32_t getMissing(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of missing weights.
Definition: NvInferRuntime.h:1013
TRT_DEPRECATED void destroy() noexcept
Definition: NvInferRuntime.h:1040
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:1219
float getDynamicRangeMax(char const *tensorName) const noexcept
Get maximum of dynamic range.
Definition: NvInferRuntime.h:1088
apiv::VRefitter * mImpl
Definition: NvInferRuntime.h:1239
int32_t getAll(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of all weights that could be refit.
Definition: NvInferRuntime.h:1030
virtual ~IRefitter() noexcept=default
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:1123
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:1138
Allows a serialized functionally unsafe engine to be deserialized.
Definition: NvInferRuntime.h:654
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:805
IRuntime * loadRuntime(char const *path) noexcept
Load IRuntime from the file.
Definition: NvInferRuntime.h:921
bool getEngineHostCodeAllowed() const noexcept
Get whether the runtime is allowed to deserialize engines with host executable code.
Definition: NvInferRuntime.h:943
TempfileControlFlags getTempfileControlFlags() const noexcept
Get the tempfile control flags for this runtime.
Definition: NvInferRuntime.h:893
void setEngineHostCodeAllowed(bool allowed) noexcept
Set whether the runtime is allowed to deserialize engines with host executable code.
Definition: NvInferRuntime.h:933
virtual ~IRuntime() noexcept=default
void setTemporaryDirectory(char const *path) noexcept
Set the directory that will be used by this runtime for temporary files.
Definition: NvInferRuntime.h:854
IPluginRegistry & getPluginRegistry() noexcept
Get the local plugin registry that can be used by the runtime.
Definition: NvInferRuntime.h:903
TRT_DEPRECATED void destroy() noexcept
Destroy this object.
Definition: NvInferRuntime.h:718
apiv::VRuntime * mImpl
Definition: NvInferRuntime.h:949
void setDLACore(int32_t dlaCore) noexcept
Sets the DLA core used by the network. Defaults to -1.
Definition: NvInferRuntime.h:689
int32_t getNbDLACores() const noexcept
Returns number of DLA hardware cores accessible or 0 if DLA is unavailable.
Definition: NvInferRuntime.h:706
ICudaEngine * deserializeCudaEngine(void const *blob, std::size_t size) noexcept
Deserialize an engine from a stream.
Definition: NvInferRuntime.h:781
void setTempfileControlFlags(TempfileControlFlags flags) noexcept
Set the tempfile control flags for this runtime.
Definition: NvInferRuntime.h:881
int32_t getDLACore() const noexcept
Get the DLA core that the engine executes on.
Definition: NvInferRuntime.h:698
void setGpuAllocator(IGpuAllocator *allocator) noexcept
Set the GPU allocator.
Definition: NvInferRuntime.h:732
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:766
ILogger * getLogger() const noexcept
get the logger with which the runtime was created
Definition: NvInferRuntime.h:791
int32_t getMaxThreads() const noexcept
Get the maximum number of threads that can be used by the runtime.
Definition: NvInferRuntime.h:819
char const * getTemporaryDirectory() const noexcept
Get the directory that will be used by this runtime for temporary files.
Definition: NvInferRuntime.h:865
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:751
Register the plugin creator to the registry The static registry object will be instantiated when the ...
Definition: NvInferRuntime.h:3681
PluginRegistrar()
Definition: NvInferRuntime.h:3683
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:126
DataType type
The type of the weights.
Definition: NvInferRuntime.h:128
int64_t count
The number of weights in the array.
Definition: NvInferRuntime.h:130
void const * values
The weight values, in a contiguous array.
Definition: NvInferRuntime.h:129
IRuntime * createInferRuntime(ILogger &logger) noexcept
Create an instance of an safe::IRuntime class.
The TensorRT API version 1 namespace.
uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferRuntime.h:1505
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:69
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:192
@ kSUB
Substract the second element from the first.
@ kSUM
Sum of the two operands.
@ kPROD
Product of the two operands.
@ kFLOOR_DIV
Floor division of the first element by the second.
@ kEQUAL
1 if operands are equal, 0 otherwise.
@ kMIN
Minimum of the two operands.
@ kLESS
1 if first operand is less than second operand, 0 otherwise.
@ kMAX
Maximum of the two operands.
@ kCEIL_DIV
Division rounding up.
TensorIOMode
Definition of tensor IO Mode.
Definition: NvInferRuntimeBase.h:883
HardwareCompatibilityLevel
Definition: NvInfer.h:9057
uint32_t TempfileControlFlags
Represents a collection of one or more TempfileControlFlag values combined using bitwise-OR operation...
Definition: NvInferRuntime.h:644
constexpr int32_t EnumMax< WeightsRole >() noexcept
Maximum number of elements in WeightsRole enum.
Definition: NvInferRuntime.h:588
@ kV2_DYNAMICEXT
IPluginV2DynamicExt.
constexpr int32_t EnumMax< LayerInformationFormat >() noexcept
Definition: NvInferRuntime.h:3475
DataType
The type of weights and tensors.
Definition: NvInferRuntimeBase.h:120
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:599
@ kSCALE
Scale layer.
@ kCONSTANT
Constant layer.
@ kDEFAULT
Similar to ONNX Gather.
TempfileControlFlag
Flags used to control TensorRT's behavior when creating executable temporary files.
Definition: NvInferRuntime.h:622
@ kALLOW_IN_MEMORY_FILES
Allow creating and loading files in-memory (or unnamed files).
constexpr int32_t EnumMax< OptProfileSelector >() noexcept
Number of different values of OptProfileSelector enum.
Definition: NvInferRuntime.h:1265
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:577
@ kSHIFT
shift part of IScaleLayer
@ kANY
Any other weights role.
@ kBIAS
bias for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
@ kKERNEL
kernel for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
constexpr int32_t EnumMax< ProfilingVerbosity >() noexcept
Maximum number of profile verbosity levels in ProfilingVerbosity enum.
Definition: NvInferRuntime.h:1530
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
Definition: NvInferRuntime.h:1517
@ kLAYER_NAMES_ONLY
Print only the layer names. This is the default setting.
@ kDETAILED
Print detailed layer information including layer names and layer parameters.
TacticSource
List of tactic sources for TensorRT.
Definition: NvInferRuntime.h:1472
TensorFormat PluginFormat
PluginFormat is reserved for backward compatibility.
Definition: NvInferRuntimePlugin.h:41
@ kMIN
Minimum of the two elements.
constexpr int32_t EnumMax< TempfileControlFlag >() noexcept
Maximum number of elements in TempfileControlFlag enum.
Definition: NvInferRuntime.h:634
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntimeBase.h:209
constexpr int32_t EnumMax< TacticSource >() noexcept
Maximum number of tactic sources in TacticSource enum.
Definition: NvInferRuntime.h:1494
LayerInformationFormat
The format in which the IEngineInspector prints the layer information.
Definition: NvInferRuntime.h:3467
@ kJSON
Print layer information in JSON format.
@ kONELINE
Print layer information in one line per layer.
constexpr int32_t EnumMax< DeviceType >() noexcept
Maximum number of elements in DeviceType enum.
Definition: NvInferRuntime.h:606
constexpr int32_t EnumMax< DimensionOperation >() noexcept
Maximum number of elements in DimensionOperation enum.
Definition: NvInferRuntime.h:206
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntime.h:216
@ kHOST
Data stored on host.
@ kDEVICE
Data stored on device.
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:1253
@ kOPT
This is used to set or get the value that is used in the optimization (kernel selection).
Definition: NvInferRuntime.h:321
Dims min
Lower bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:326
Dims max
Upper bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:329
PluginTensorDesc desc
Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of ...
Definition: NvInferRuntime.h:323
Fields that a plugin might see for an input or output.
Definition: NvInferRuntimePlugin.h:54
Declaration of EnumMaxImpl struct to store maximum number of elements in an enumeration type.
Definition: NvInferRuntimeBase.h:105