TensorRT 8.4.3
NvInferRuntime.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
4 *
5 * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
6 * property and proprietary rights in and to this material, related
7 * documentation and any modifications thereto. Any use, reproduction,
8 * disclosure or distribution of this material and related documentation
9 * without an express license agreement from NVIDIA CORPORATION or
10 * its affiliates is strictly prohibited.
11 */
12
13#ifndef NV_INFER_RUNTIME_H
14#define NV_INFER_RUNTIME_H
15
21
22#include "NvInferImpl.h"
24
25namespace nvinfer1
26{
27
28class IExecutionContext;
29class ICudaEngine;
30class IPluginFactory;
31class IEngineInspector;
32
41
43{
44protected:
45 INoCopy() = default;
46 virtual ~INoCopy() = default;
47 INoCopy(INoCopy const& other) = delete;
48 INoCopy& operator=(INoCopy const& other) = delete;
49 INoCopy(INoCopy&& other) = delete;
50 INoCopy& operator=(INoCopy&& other) = delete;
51};
52
67
68enum class EngineCapability : int32_t
69{
74 kSTANDARD = 0,
75
78
85 kSAFETY = 1,
86
89
96
99};
100
101namespace impl
102{
104template <>
106{
107 static constexpr int32_t kVALUE = 3;
108};
109} // namespace impl
110
126{
127public:
129 void const* values;
130 int64_t count;
131};
132
143class IHostMemory : public INoCopy
144{
145public:
146 virtual ~IHostMemory() noexcept = default;
147
149 void* data() const noexcept
150 {
151 return mImpl->data();
152 }
153
155 std::size_t size() const noexcept
156 {
157 return mImpl->size();
158 }
159
161 DataType type() const noexcept
162 {
163 return mImpl->type();
164 }
172 TRT_DEPRECATED void destroy() noexcept
173 {
174 delete this;
175 }
176
177protected:
178 apiv::VHostMemory* mImpl;
179};
180
191enum class DimensionOperation : int32_t
192{
193 kSUM = 0,
194 kPROD = 1,
195 kMAX = 2,
196 kMIN = 3,
197 kSUB = 4,
198 kEQUAL = 5,
199 kLESS = 6,
200 kFLOOR_DIV = 7,
201 kCEIL_DIV = 8
202};
203
205template <>
206constexpr inline int32_t EnumMax<DimensionOperation>() noexcept
207{
208 return 9;
209}
210
215enum class TensorLocation : int32_t
216{
217 kDEVICE = 0,
218 kHOST = 1,
219};
220
221namespace impl
222{
224template <>
226{
227 static constexpr int32_t kVALUE = 2;
228};
229} // namespace impl
230
244{
245public:
247 bool isConstant() const noexcept
248 {
249 return mImpl->isConstant();
250 }
251
254 int32_t getConstantValue() const noexcept
255 {
256 return mImpl->getConstantValue();
257 }
258
259protected:
260 apiv::VDimensionExpr* mImpl;
261 virtual ~IDimensionExpr() noexcept = default;
262};
263
281class IExprBuilder : public INoCopy
282{
283public:
285 IDimensionExpr const* constant(int32_t value) noexcept
286 {
287 return mImpl->constant(value);
288 }
289
293 DimensionOperation op, IDimensionExpr const& first, IDimensionExpr const& second) noexcept
294 {
295 return mImpl->operation(op, first, second);
296 }
297
298protected:
299 apiv::VExprBuilder* mImpl;
300 virtual ~IExprBuilder() noexcept = default;
301};
302
309{
310public:
311 int32_t nbDims;
313};
314
321{
324
327
330};
331
351{
352public:
353 IPluginV2DynamicExt* clone() const noexcept override = 0;
354
379 virtual DimsExprs getOutputDimensions(
380 int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept = 0;
381
385 static constexpr int32_t kFORMAT_COMBINATION_LIMIT = 100;
386
419 virtual bool supportsFormatCombination(
420 int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
421
460 virtual void configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs,
461 DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
462
472 virtual size_t getWorkspaceSize(PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs,
473 int32_t nbOutputs) const noexcept = 0;
474
487 virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc,
488 void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0;
489
490protected:
498 int32_t getTensorRTVersion() const noexcept override
499 {
500 return (static_cast<int32_t>(PluginVersion::kV2_DYNAMICEXT) << 24 | (NV_TENSORRT_VERSION & 0xFFFFFF));
501 }
502
503 virtual ~IPluginV2DynamicExt() noexcept {}
504
505private:
506 // Following are obsolete base class methods, and must not be implemented or used.
507
508 void configurePlugin(Dims const*, int32_t, Dims const*, int32_t, DataType const*, DataType const*, bool const*,
509 bool const*, PluginFormat, int32_t) noexcept override final
510 {
511 }
512
513 bool supportsFormat(DataType, PluginFormat) const noexcept override final
514 {
515 return false;
516 }
517
518 Dims getOutputDimensions(int32_t, Dims const*, int32_t) noexcept override final
519 {
520 return Dims{-1, {}};
521 }
522
523 bool isOutputBroadcastAcrossBatch(int32_t, bool const*, int32_t) const noexcept override final
524 {
525 return false;
526 }
527
528 bool canBroadcastInputAcrossBatch(int32_t) const noexcept override final
529 {
530 return true;
531 }
532
533 size_t getWorkspaceSize(int32_t) const noexcept override final
534 {
535 return 0;
536 }
537
538 int32_t enqueue(int32_t, void const* const*, void* const*, void*, cudaStream_t) noexcept override final
539 {
540 return 1;
541 }
542};
543
556{
557public:
564 virtual void reportLayerTime(char const* layerName, float ms) noexcept = 0;
565
566 virtual ~IProfiler() noexcept {}
567};
568
575enum class WeightsRole : int32_t
576{
577 kKERNEL = 0,
578 kBIAS = 1,
579 kSHIFT = 2,
580 kSCALE = 3,
581 kCONSTANT = 4,
582 kANY = 5,
583};
584
586template <>
587constexpr inline int32_t EnumMax<WeightsRole>() noexcept
588{
589 return 6;
590}
591
597enum class DeviceType : int32_t
598{
599 kGPU,
600 kDLA,
601};
602
604template <>
605constexpr inline int32_t EnumMax<DeviceType>() noexcept
606{
607 return 2;
608}
609
617class IRuntime : public INoCopy
618{
619public:
620 virtual ~IRuntime() noexcept = default;
621
637 TRT_DEPRECATED nvinfer1::ICudaEngine* deserializeCudaEngine(
638 void const* blob, std::size_t size, IPluginFactory* pluginFactory) noexcept
639 {
640 return mImpl->deserializeCudaEngine(blob, size, nullptr);
641 }
642
653 void setDLACore(int32_t dlaCore) noexcept
654 {
655 mImpl->setDLACore(dlaCore);
656 }
657
662 int32_t getDLACore() const noexcept
663 {
664 return mImpl->getDLACore();
665 }
666
670 int32_t getNbDLACores() const noexcept
671 {
672 return mImpl->getNbDLACores();
673 }
674
682 TRT_DEPRECATED void destroy() noexcept
683 {
684 delete this;
685 }
686
696 void setGpuAllocator(IGpuAllocator* allocator) noexcept
697 {
698 mImpl->setGpuAllocator(allocator);
699 }
700
712 //
715 void setErrorRecorder(IErrorRecorder* recorder) noexcept
716 {
717 mImpl->setErrorRecorder(recorder);
718 }
719
731 {
732 return mImpl->getErrorRecorder();
733 }
734
745 ICudaEngine* deserializeCudaEngine(void const* blob, std::size_t size) noexcept
746 {
747 return mImpl->deserializeCudaEngine(blob, size, nullptr);
748 }
749
755 ILogger* getLogger() const noexcept
756 {
757 return mImpl->getLogger();
758 }
759
769 bool setMaxThreads(int32_t maxThreads) noexcept
770 {
771 return mImpl->setMaxThreads(maxThreads);
772 }
773
783 int32_t getMaxThreads() const noexcept
784 {
785 return mImpl->getMaxThreads();
786 }
787
788protected:
789 apiv::VRuntime* mImpl;
790};
791
799class IRefitter : public INoCopy
800{
801public:
802 virtual ~IRefitter() noexcept = default;
803
814 bool setWeights(char const* layerName, WeightsRole role, Weights weights) noexcept
815 {
816 return mImpl->setWeights(layerName, role, weights);
817 }
818
829 bool refitCudaEngine() noexcept
830 {
831 return mImpl->refitCudaEngine();
832 }
833
850 int32_t getMissing(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
851 {
852 return mImpl->getMissing(size, layerNames, roles);
853 }
854
867 int32_t getAll(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
868 {
869 return mImpl->getAll(size, layerNames, roles);
870 }
871
877 TRT_DEPRECATED void destroy() noexcept
878 {
879 delete this;
880 }
881
894 bool setDynamicRange(char const* tensorName, float min, float max) noexcept
895 {
896 return mImpl->setDynamicRange(tensorName, min, max);
897 }
898
906 float getDynamicRangeMin(char const* tensorName) const noexcept
907 {
908 return mImpl->getDynamicRangeMin(tensorName);
909 }
910
918 float getDynamicRangeMax(char const* tensorName) const noexcept
919 {
920 return mImpl->getDynamicRangeMax(tensorName);
921 }
922
934 int32_t getTensorsWithDynamicRange(int32_t size, char const** tensorNames) const noexcept
935 {
936 return mImpl->getTensorsWithDynamicRange(size, tensorNames);
937 }
938
950 //
953 void setErrorRecorder(IErrorRecorder* recorder) noexcept
954 {
955 mImpl->setErrorRecorder(recorder);
956 }
957
969 {
970 return mImpl->getErrorRecorder();
971 }
972
986 bool setNamedWeights(char const* name, Weights weights) noexcept
987 {
988 return mImpl->setNamedWeights(name, weights);
989 }
990
1006 int32_t getMissingWeights(int32_t size, char const** weightsNames) noexcept
1007 {
1008 return mImpl->getMissingWeights(size, weightsNames);
1009 }
1010
1022 int32_t getAllWeights(int32_t size, char const** weightsNames) noexcept
1023 {
1024 return mImpl->getAllWeights(size, weightsNames);
1025 }
1026
1032 ILogger* getLogger() const noexcept
1033 {
1034 return mImpl->getLogger();
1035 }
1036
1046 bool setMaxThreads(int32_t maxThreads) noexcept
1047 {
1048 return mImpl->setMaxThreads(maxThreads);
1049 }
1050
1060 int32_t getMaxThreads() const noexcept
1061 {
1062 return mImpl->getMaxThreads();
1063 }
1064
1065protected:
1066 apiv::VRefitter* mImpl;
1067};
1068
1079enum class OptProfileSelector : int32_t
1080{
1081 kMIN = 0,
1082 kOPT = 1,
1083 kMAX = 2
1084};
1085
1091template <>
1092constexpr inline int32_t EnumMax<OptProfileSelector>() noexcept
1093{
1094 return 3;
1095}
1096
1120{
1121public:
1147 bool setDimensions(char const* inputName, OptProfileSelector select, Dims dims) noexcept
1148 {
1149 return mImpl->setDimensions(inputName, select, dims);
1150 }
1151
1157 Dims getDimensions(char const* inputName, OptProfileSelector select) const noexcept
1158 {
1159 return mImpl->getDimensions(inputName, select);
1160 }
1161
1201 char const* inputName, OptProfileSelector select, int32_t const* values, int32_t nbValues) noexcept
1202 {
1203 return mImpl->setShapeValues(inputName, select, values, nbValues);
1204 }
1205
1212 int32_t getNbShapeValues(char const* inputName) const noexcept
1213 {
1214 return mImpl->getNbShapeValues(inputName);
1215 }
1216
1222 int32_t const* getShapeValues(char const* inputName, OptProfileSelector select) const noexcept
1223 {
1224 return mImpl->getShapeValues(inputName, select);
1225 }
1226
1240 bool setExtraMemoryTarget(float target) noexcept
1241 {
1242 return mImpl->setExtraMemoryTarget(target);
1243 }
1244
1248 float getExtraMemoryTarget() const noexcept
1249 {
1250 return mImpl->getExtraMemoryTarget();
1251 }
1252
1264 bool isValid() const noexcept
1265 {
1266 return mImpl->isValid();
1267 }
1268
1269protected:
1270 apiv::VOptimizationProfile* mImpl;
1271 virtual ~IOptimizationProfile() noexcept = default;
1272};
1273
1281enum class TacticSource : int32_t
1282{
1284 kCUBLAS = 0,
1285 kCUBLAS_LT = 1,
1286 kCUDNN = 2,
1287
1291};
1292
1293template <>
1294constexpr inline int32_t EnumMax<TacticSource>() noexcept
1295{
1296 return 4;
1297}
1298
1305using TacticSources = uint32_t;
1306
1316enum class ProfilingVerbosity : int32_t
1317{
1318 kLAYER_NAMES_ONLY = 0,
1319 kNONE = 1,
1320 kDETAILED = 2,
1321
1326};
1327
1329template <>
1330constexpr inline int32_t EnumMax<ProfilingVerbosity>() noexcept
1331{
1332 return 3;
1333}
1334
1342class ICudaEngine : public INoCopy
1343{
1344public:
1345 virtual ~ICudaEngine() noexcept = default;
1346
1357 int32_t getNbBindings() const noexcept
1358 {
1359 return mImpl->getNbBindings();
1360 }
1361
1379 int32_t getBindingIndex(char const* name) const noexcept
1380 {
1381 return mImpl->getBindingIndex(name);
1382 }
1383
1399 char const* getBindingName(int32_t bindingIndex) const noexcept
1400 {
1401 return mImpl->getBindingName(bindingIndex);
1402 }
1403
1412 bool bindingIsInput(int32_t bindingIndex) const noexcept
1413 {
1414 return mImpl->bindingIsInput(bindingIndex);
1415 }
1416
1437 Dims getBindingDimensions(int32_t bindingIndex) const noexcept
1438 {
1439 return mImpl->getBindingDimensions(bindingIndex);
1440 }
1441
1450 DataType getBindingDataType(int32_t bindingIndex) const noexcept
1451 {
1452 return mImpl->getBindingDataType(bindingIndex);
1453 }
1454
1466 TRT_DEPRECATED int32_t getMaxBatchSize() const noexcept
1467 {
1468 return mImpl->getMaxBatchSize();
1469 }
1470
1480 int32_t getNbLayers() const noexcept
1481 {
1482 return mImpl->getNbLayers();
1483 }
1484
1494 IHostMemory* serialize() const noexcept
1495 {
1496 return mImpl->serialize();
1497 }
1498
1511 {
1512 return mImpl->createExecutionContext();
1513 }
1514
1522 TRT_DEPRECATED void destroy() noexcept
1523 {
1524 delete this;
1525 }
1526
1537 TensorLocation getLocation(int32_t bindingIndex) const noexcept
1538 {
1539 return mImpl->getLocation(bindingIndex);
1540 }
1541
1547 {
1548 return mImpl->createExecutionContextWithoutDeviceMemory();
1549 }
1550
1556 size_t getDeviceMemorySize() const noexcept
1557 {
1558 return mImpl->getDeviceMemorySize();
1559 }
1560
1566 bool isRefittable() const noexcept
1567 {
1568 return mImpl->isRefittable();
1569 }
1570
1580 int32_t getBindingBytesPerComponent(int32_t bindingIndex) const noexcept
1581 {
1582 return mImpl->getBindingBytesPerComponent(bindingIndex);
1583 }
1584
1594 int32_t getBindingComponentsPerElement(int32_t bindingIndex) const noexcept
1595 {
1596 return mImpl->getBindingComponentsPerElement(bindingIndex);
1597 }
1598
1604 TensorFormat getBindingFormat(int32_t bindingIndex) const noexcept
1605 {
1606 return mImpl->getBindingFormat(bindingIndex);
1607 }
1608
1623 char const* getBindingFormatDesc(int32_t bindingIndex) const noexcept
1624 {
1625 return mImpl->getBindingFormatDesc(bindingIndex);
1626 }
1627
1635 int32_t getBindingVectorizedDim(int32_t bindingIndex) const noexcept
1636 {
1637 return mImpl->getBindingVectorizedDim(bindingIndex);
1638 }
1639
1650 char const* getName() const noexcept
1651 {
1652 return mImpl->getName();
1653 }
1654
1661 int32_t getNbOptimizationProfiles() const noexcept
1662 {
1663 return mImpl->getNbOptimizationProfiles();
1664 }
1665
1688 Dims getProfileDimensions(int32_t bindingIndex, int32_t profileIndex, OptProfileSelector select) const noexcept
1689 {
1690 return mImpl->getProfileDimensions(bindingIndex, profileIndex, select);
1691 }
1692
1715 int32_t profileIndex, int32_t inputIndex, OptProfileSelector select) const noexcept
1716 {
1717 return mImpl->getProfileShapeValues(profileIndex, inputIndex, select);
1718 }
1719
1751 bool isShapeBinding(int32_t bindingIndex) const noexcept
1752 {
1753 return mImpl->isShapeBinding(bindingIndex);
1754 }
1755
1765 bool isExecutionBinding(int32_t bindingIndex) const noexcept
1766 {
1767 return mImpl->isExecutionBinding(bindingIndex);
1768 }
1769
1781 {
1782 return mImpl->getEngineCapability();
1783 }
1784
1795 //
1798 void setErrorRecorder(IErrorRecorder* recorder) noexcept
1799 {
1800 return mImpl->setErrorRecorder(recorder);
1801 }
1802
1814 {
1815 return mImpl->getErrorRecorder();
1816 }
1817
1832 bool hasImplicitBatchDimension() const noexcept
1833 {
1834 return mImpl->hasImplicitBatchDimension();
1835 }
1836
1848 {
1849 return mImpl->getTacticSources();
1850 }
1851
1859 {
1860 return mImpl->getProfilingVerbosity();
1861 }
1862
1869 {
1870 return mImpl->createEngineInspector();
1871 }
1872
1873protected:
1874 apiv::VCudaEngine* mImpl;
1875};
1876
1888{
1889public:
1890 virtual ~IExecutionContext() noexcept = default;
1891
1914 TRT_DEPRECATED bool execute(int32_t batchSize, void* const* bindings) noexcept
1915 {
1916 return mImpl->execute(batchSize, bindings);
1917 }
1918
1949 int32_t batchSize, void* const* bindings, cudaStream_t stream, cudaEvent_t* inputConsumed) noexcept
1950 {
1951 return mImpl->enqueue(batchSize, bindings, stream, inputConsumed);
1952 }
1953
1962 void setDebugSync(bool sync) noexcept
1963 {
1964 mImpl->setDebugSync(sync);
1965 }
1966
1972 bool getDebugSync() const noexcept
1973 {
1974 return mImpl->getDebugSync();
1975 }
1976
1982 void setProfiler(IProfiler* profiler) noexcept
1983 {
1984 mImpl->setProfiler(profiler);
1985 }
1986
1992 IProfiler* getProfiler() const noexcept
1993 {
1994 return mImpl->getProfiler();
1995 }
1996
2002 ICudaEngine const& getEngine() const noexcept
2003 {
2004 return mImpl->getEngine();
2005 }
2006
2014 TRT_DEPRECATED void destroy() noexcept
2015 {
2016 delete this;
2017 }
2018
2026 void setName(char const* name) noexcept
2027 {
2028 mImpl->setName(name);
2029 }
2030
2036 char const* getName() const noexcept
2037 {
2038 return mImpl->getName();
2039 }
2040
2052 void setDeviceMemory(void* memory) noexcept
2053 {
2054 mImpl->setDeviceMemory(memory);
2055 }
2056
2073 Dims getStrides(int32_t bindingIndex) const noexcept
2074 {
2075 return mImpl->getStrides(bindingIndex);
2076 }
2077
2078public:
2116 bool setOptimizationProfile(int32_t profileIndex) noexcept
2117 {
2118 return mImpl->setOptimizationProfile(profileIndex);
2119 }
2120
2128 int32_t getOptimizationProfile() const noexcept
2129 {
2130 return mImpl->getOptimizationProfile();
2131 }
2132
2165 bool setBindingDimensions(int32_t bindingIndex, Dims dimensions) noexcept
2166 {
2167 return mImpl->setBindingDimensions(bindingIndex, dimensions);
2168 }
2169
2195 Dims getBindingDimensions(int32_t bindingIndex) const noexcept
2196 {
2197 return mImpl->getBindingDimensions(bindingIndex);
2198 }
2199
2225 bool setInputShapeBinding(int32_t bindingIndex, int32_t const* data) noexcept
2226 {
2227 return mImpl->setInputShapeBinding(bindingIndex, data);
2228 }
2229
2247 bool getShapeBinding(int32_t bindingIndex, int32_t* data) const noexcept
2248 {
2249 return mImpl->getShapeBinding(bindingIndex, data);
2250 }
2251
2262 bool allInputDimensionsSpecified() const noexcept
2263 {
2264 return mImpl->allInputDimensionsSpecified();
2265 }
2266
2276 bool allInputShapesSpecified() const noexcept
2277
2278 {
2279 return mImpl->allInputShapesSpecified();
2280 }
2281
2293 //
2296 void setErrorRecorder(IErrorRecorder* recorder) noexcept
2297 {
2298 mImpl->setErrorRecorder(recorder);
2299 }
2300
2312 {
2313 return mImpl->getErrorRecorder();
2314 }
2315
2328 bool executeV2(void* const* bindings) noexcept
2329 {
2330 return mImpl->executeV2(bindings);
2331 }
2332
2356 bool enqueueV2(void* const* bindings, cudaStream_t stream, cudaEvent_t* inputConsumed) noexcept
2357 {
2358 return mImpl->enqueueV2(bindings, stream, inputConsumed);
2359 }
2360
2404 bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
2405 {
2406 return mImpl->setOptimizationProfileAsync(profileIndex, stream);
2407 }
2408
2419 void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
2420 {
2421 mImpl->setEnqueueEmitsProfile(enqueueEmitsProfile);
2422 }
2423
2430 bool getEnqueueEmitsProfile() const noexcept
2431 {
2432 return mImpl->getEnqueueEmitsProfile();
2433 }
2434
2459 bool reportToProfiler() const noexcept
2460 {
2461 return mImpl->reportToProfiler();
2462 }
2463
2464protected:
2465 apiv::VExecutionContext* mImpl;
2466}; // class IExecutionContext
2467
2475enum class LayerInformationFormat : int32_t
2476{
2477 kONELINE = 0,
2478 kJSON = 1,
2479};
2480
2483template <>
2484constexpr inline int32_t EnumMax<LayerInformationFormat>() noexcept
2485{
2486 return 2;
2487}
2488
2505{
2506public:
2507 virtual ~IEngineInspector() noexcept = default;
2508
2521 bool setExecutionContext(IExecutionContext const* context) noexcept
2522 {
2523 return mImpl->setExecutionContext(context);
2524 }
2525
2534 {
2535 return mImpl->getExecutionContext();
2536 }
2537
2558 AsciiChar const* getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
2559 {
2560 return mImpl->getLayerInformation(layerIndex, format);
2561 }
2562
2584 {
2585 return mImpl->getEngineInformation(format);
2586 }
2587
2599 //
2602 void setErrorRecorder(IErrorRecorder* recorder) noexcept
2603 {
2604 mImpl->setErrorRecorder(recorder);
2605 }
2606
2618 {
2619 return mImpl->getErrorRecorder();
2620 }
2621
2622protected:
2623 apiv::VEngineInspector* mImpl;
2624}; // class IEngineInspector
2625
2626} // namespace nvinfer1
2627
2632extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(void* logger, int32_t version) noexcept;
2633
2638extern "C" TENSORRTAPI void* createInferRefitter_INTERNAL(void* engine, void* logger, int32_t version) noexcept;
2639
2644
2650extern "C" TENSORRTAPI nvinfer1::ILogger* getLogger() noexcept;
2651
2652namespace nvinfer1
2653{
2654namespace // unnamed namespace avoids linkage surprises when linking objects built with different versions of this
2655 // header.
2656{
2662inline IRuntime* createInferRuntime(ILogger& logger) noexcept
2663{
2664 return static_cast<IRuntime*>(createInferRuntime_INTERNAL(&logger, NV_TENSORRT_VERSION));
2665}
2666
2672inline IRefitter* createInferRefitter(ICudaEngine& engine, ILogger& logger) noexcept
2673{
2674 return static_cast<IRefitter*>(createInferRefitter_INTERNAL(&engine, &logger, NV_TENSORRT_VERSION));
2675}
2676
2677} // namespace
2678
2690template <typename T>
2692{
2693public:
2695 {
2696 getPluginRegistry()->registerCreator(instance, "");
2697 }
2698
2699private:
2701 T instance{};
2702};
2703
2704} // namespace nvinfer1
2705
2706#define REGISTER_TENSORRT_PLUGIN(name) \
2707 static nvinfer1::PluginRegistrar<name> pluginRegistrar##name {}
2708#endif // NV_INFER_RUNTIME_H
nvinfer1::ILogger * getLogger() noexcept
Return the logger object.
nvinfer1::IPluginRegistry * getPluginRegistry() noexcept
Return the plugin registry.
#define TENSORRTAPI
Definition: NvInferRuntimeCommon.h:54
#define NV_TENSORRT_VERSION
Definition: NvInferRuntimeCommon.h:73
#define TRT_DEPRECATED
Definition: NvInferRuntimeCommon.h:40
#define TRT_DEPRECATED_ENUM
Definition: NvInferRuntimeCommon.h:41
Structure to define the dimensions of a tensor.
Definition: NvInferRuntimeCommon.h:153
static constexpr int32_t MAX_DIMS
The maximum rank (number of dimensions) supported for a tensor.
Definition: NvInferRuntimeCommon.h:156
Definition: NvInferRuntime.h:309
int32_t nbDims
The number of dimensions.
Definition: NvInferRuntime.h:311
An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:1343
int32_t getBindingBytesPerComponent(int32_t bindingIndex) const noexcept
Return the number of bytes per component of an element.
Definition: NvInferRuntime.h:1580
int32_t getBindingComponentsPerElement(int32_t bindingIndex) const noexcept
Return the number of components included in one element.
Definition: NvInferRuntime.h:1594
bool isShapeBinding(int32_t bindingIndex) const noexcept
True if tensor is required as input for shape calculations or output from them.
Definition: NvInferRuntime.h:1751
bool hasImplicitBatchDimension() const noexcept
Query whether the engine was built with an implicit batch dimension.
Definition: NvInferRuntime.h:1832
int32_t const * getProfileShapeValues(int32_t profileIndex, int32_t inputIndex, OptProfileSelector select) const noexcept
Get minimum / optimum / maximum values for an input shape binding under an optimization profile.
Definition: NvInferRuntime.h:1714
EngineCapability getEngineCapability() const noexcept
Determine what execution capability this engine has.
Definition: NvInferRuntime.h:1780
TRT_DEPRECATED void destroy() noexcept
Destroy this object;.
Definition: NvInferRuntime.h:1522
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:1813
TensorFormat getBindingFormat(int32_t bindingIndex) const noexcept
Return the binding format.
Definition: NvInferRuntime.h:1604
apiv::VCudaEngine * mImpl
Definition: NvInferRuntime.h:1874
TensorLocation getLocation(int32_t bindingIndex) const noexcept
Get location of binding.
Definition: NvInferRuntime.h:1537
Dims getProfileDimensions(int32_t bindingIndex, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a particular binding under an optimization profile...
Definition: NvInferRuntime.h:1688
Dims getBindingDimensions(int32_t bindingIndex) const noexcept
Get the dimensions of a binding.
Definition: NvInferRuntime.h:1437
int32_t getBindingVectorizedDim(int32_t bindingIndex) const noexcept
Return the dimension index that the buffer is vectorized.
Definition: NvInferRuntime.h:1635
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:1798
char const * getBindingName(int32_t bindingIndex) const noexcept
Retrieve the name corresponding to a binding index.
Definition: NvInferRuntime.h:1399
size_t getDeviceMemorySize() const noexcept
Return the amount of device memory required by an execution context.
Definition: NvInferRuntime.h:1556
TacticSources getTacticSources() const noexcept
return the tactic sources required by this engine.
Definition: NvInferRuntime.h:1847
virtual ~ICudaEngine() noexcept=default
char const * getName() const noexcept
Returns the name of the network associated with the engine.
Definition: NvInferRuntime.h:1650
ProfilingVerbosity getProfilingVerbosity() const noexcept
Return the ProfilingVerbosity the builder config was set to when the engine was built.
Definition: NvInferRuntime.h:1858
int32_t getBindingIndex(char const *name) const noexcept
Retrieve the binding index for a named tensor.
Definition: NvInferRuntime.h:1379
char const * getBindingFormatDesc(int32_t bindingIndex) const noexcept
Return the human readable description of the tensor format.
Definition: NvInferRuntime.h:1623
IHostMemory * serialize() const noexcept
Serialize the network to a stream.
Definition: NvInferRuntime.h:1494
IExecutionContext * createExecutionContextWithoutDeviceMemory() noexcept
create an execution context without any device memory allocated
Definition: NvInferRuntime.h:1546
DataType getBindingDataType(int32_t bindingIndex) const noexcept
Determine the required data type for a buffer from its binding index.
Definition: NvInferRuntime.h:1450
IExecutionContext * createExecutionContext() noexcept
Create an execution context.
Definition: NvInferRuntime.h:1510
IEngineInspector * createEngineInspector() const noexcept
Create a new engine inspector which prints the layer information in an engine or an execution context...
Definition: NvInferRuntime.h:1868
TRT_DEPRECATED int32_t getMaxBatchSize() const noexcept
Get the maximum batch size which can be used for inference. Should only be called if the engine is bu...
Definition: NvInferRuntime.h:1466
int32_t getNbOptimizationProfiles() const noexcept
Get the number of optimization profiles defined for this engine.
Definition: NvInferRuntime.h:1661
int32_t getNbLayers() const noexcept
Get the number of layers in the network.
Definition: NvInferRuntime.h:1480
bool bindingIsInput(int32_t bindingIndex) const noexcept
Determine whether a binding is an input binding.
Definition: NvInferRuntime.h:1412
bool isExecutionBinding(int32_t bindingIndex) const noexcept
True if pointer to tensor data is required for execution phase, false if nullptr can be supplied.
Definition: NvInferRuntime.h:1765
bool isRefittable() const noexcept
Return true if an engine can be refit.
Definition: NvInferRuntime.h:1566
Definition: NvInferRuntime.h:244
bool isConstant() const noexcept
Return true if expression is a build-time constant.
Definition: NvInferRuntime.h:247
virtual ~IDimensionExpr() noexcept=default
apiv::VDimensionExpr * mImpl
Definition: NvInferRuntime.h:260
int32_t getConstantValue() const noexcept
Definition: NvInferRuntime.h:254
An engine inspector which prints out the layer information of an engine or an execution context.
Definition: NvInferRuntime.h:2505
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:2617
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:2602
AsciiChar const * getEngineInformation(LayerInformationFormat format) const noexcept
Get a string describing the information about all the layers in the current engine or the execution c...
Definition: NvInferRuntime.h:2583
IExecutionContext const * getExecutionContext() const noexcept
Get the context currently being inspected.
Definition: NvInferRuntime.h:2533
apiv::VEngineInspector * mImpl
Definition: NvInferRuntime.h:2623
virtual ~IEngineInspector() noexcept=default
AsciiChar const * getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
Get a string describing the information about a specific layer in the current engine or the execution...
Definition: NvInferRuntime.h:2558
Reference counted application-implemented error reporting interface for TensorRT objects.
Definition: NvInferRuntimeCommon.h:1665
Context for executing inference using an engine, with functionally unsafe features.
Definition: NvInferRuntime.h:1888
TRT_DEPRECATED bool enqueue(int32_t batchSize, void *const *bindings, cudaStream_t stream, cudaEvent_t *inputConsumed) noexcept
Asynchronously execute inference on a batch.
Definition: NvInferRuntime.h:1948
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:2311
bool reportToProfiler() const noexcept
Calculate layer timing info for the current optimization profile in IExecutionContext and update the ...
Definition: NvInferRuntime.h:2459
virtual ~IExecutionContext() noexcept=default
void setDeviceMemory(void *memory) noexcept
Set the device memory for use by this execution context.
Definition: NvInferRuntime.h:2052
Dims getBindingDimensions(int32_t bindingIndex) const noexcept
Get the dynamic dimensions of a binding.
Definition: NvInferRuntime.h:2195
bool getShapeBinding(int32_t bindingIndex, int32_t *data) const noexcept
Get values of an input tensor required for shape calculations or an output tensor produced by shape c...
Definition: NvInferRuntime.h:2247
char const * getName() const noexcept
Return the name of the execution context.
Definition: NvInferRuntime.h:2036
void setDebugSync(bool sync) noexcept
Set the debug sync flag.
Definition: NvInferRuntime.h:1962
bool enqueueV2(void *const *bindings, cudaStream_t stream, cudaEvent_t *inputConsumed) noexcept
Asynchronously execute inference.
Definition: NvInferRuntime.h:2356
void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
Set whether enqueue emits layer timing to the profiler.
Definition: NvInferRuntime.h:2419
bool setBindingDimensions(int32_t bindingIndex, Dims dimensions) noexcept
Set the dynamic dimensions of a binding.
Definition: NvInferRuntime.h:2165
bool setInputShapeBinding(int32_t bindingIndex, int32_t const *data) noexcept
Set values of input tensor required by shape calculations.
Definition: NvInferRuntime.h:2225
bool executeV2(void *const *bindings) noexcept
Synchronously execute inference a network.
Definition: NvInferRuntime.h:2328
bool getEnqueueEmitsProfile() const noexcept
Get the enqueueEmitsProfile state.
Definition: NvInferRuntime.h:2430
bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
Select an optimization profile for the current context with async semantics.
Definition: NvInferRuntime.h:2404
apiv::VExecutionContext * mImpl
Definition: NvInferRuntime.h:2465
ICudaEngine const & getEngine() const noexcept
Get the associated engine.
Definition: NvInferRuntime.h:2002
int32_t getOptimizationProfile() const noexcept
Get the index of the currently selected optimization profile.
Definition: NvInferRuntime.h:2128
TRT_DEPRECATED void destroy() noexcept
Destroy this object.
Definition: NvInferRuntime.h:2014
bool getDebugSync() const noexcept
Get the debug sync flag.
Definition: NvInferRuntime.h:1972
Dims getStrides(int32_t bindingIndex) const noexcept
Return the strides of the buffer for the given binding.
Definition: NvInferRuntime.h:2073
IProfiler * getProfiler() const noexcept
Get the profiler.
Definition: NvInferRuntime.h:1992
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:2296
TRT_DEPRECATED bool setOptimizationProfile(int32_t profileIndex) noexcept
Select an optimization profile for the current context.
Definition: NvInferRuntime.h:2116
bool allInputDimensionsSpecified() const noexcept
Whether all dynamic dimensions of input tensors have been specified.
Definition: NvInferRuntime.h:2262
void setProfiler(IProfiler *profiler) noexcept
Set the profiler.
Definition: NvInferRuntime.h:1982
void setName(char const *name) noexcept
Set the name of the execution context.
Definition: NvInferRuntime.h:2026
bool allInputShapesSpecified() const noexcept
Whether all input shape bindings have been specified.
Definition: NvInferRuntime.h:2276
Definition: NvInferRuntime.h:282
IDimensionExpr const * operation(DimensionOperation op, IDimensionExpr const &first, IDimensionExpr const &second) noexcept
Definition: NvInferRuntime.h:292
virtual ~IExprBuilder() noexcept=default
apiv::VExprBuilder * mImpl
Definition: NvInferRuntime.h:299
IDimensionExpr const * constant(int32_t value) noexcept
Return pointer to IDimensionExp for given value.
Definition: NvInferRuntime.h:285
Application-implemented class for controlling allocation on the GPU.
Definition: NvInferRuntimeCommon.h:1338
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:144
void * data() const noexcept
A pointer to the raw data that is owned by the library.
Definition: NvInferRuntime.h:149
DataType type() const noexcept
The type of the memory that was allocated.
Definition: NvInferRuntime.h:161
TRT_DEPRECATED void destroy() noexcept
Definition: NvInferRuntime.h:172
std::size_t size() const noexcept
The size in bytes of the data that was allocated.
Definition: NvInferRuntime.h:155
apiv::VHostMemory * mImpl
Definition: NvInferRuntime.h:178
virtual ~IHostMemory() noexcept=default
Application-implemented logging interface for the builder, refitter and runtime.
Definition: NvInferRuntimeCommon.h:1476
Forward declaration of IEngineInspector for use by other interfaces.
Definition: NvInferRuntime.h:43
INoCopy & operator=(INoCopy &&other)=delete
INoCopy(INoCopy const &other)=delete
INoCopy(INoCopy &&other)=delete
virtual ~INoCopy()=default
INoCopy & operator=(INoCopy const &other)=delete
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:1120
int32_t const * getShapeValues(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:1222
apiv::VOptimizationProfile * mImpl
Definition: NvInferRuntime.h:1270
Dims getDimensions(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:1157
virtual ~IOptimizationProfile() noexcept=default
float getExtraMemoryTarget() const noexcept
Get the extra memory target that has been defined for this profile.
Definition: NvInferRuntime.h:1248
bool setDimensions(char const *inputName, OptProfileSelector select, Dims dims) noexcept
Set the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:1147
bool setExtraMemoryTarget(float target) noexcept
Set a target for extra GPU memory that may be used by this profile.
Definition: NvInferRuntime.h:1240
bool setShapeValues(char const *inputName, OptProfileSelector select, int32_t const *values, int32_t nbValues) noexcept
Set the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:1200
bool isValid() const noexcept
Check whether the optimization profile can be passed to an IBuilderConfig object.
Definition: NvInferRuntime.h:1264
int32_t getNbShapeValues(char const *inputName) const noexcept
Get the number of values for an input shape tensor.
Definition: NvInferRuntime.h:1212
Single registration point for all plugins in an application. It is used to find plugin implementation...
Definition: NvInferRuntimeCommon.h:1210
virtual bool registerCreator(IPluginCreator &creator, AsciiChar const *const pluginNamespace) noexcept=0
Register a plugin creator. Returns false if one with same type is already registered.
Definition: NvInferRuntime.h:351
IPluginV2DynamicExt * clone() const noexcept override=0
Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin...
virtual ~IPluginV2DynamicExt() noexcept
Definition: NvInferRuntime.h:503
Plugin class for user-implemented layers.
Definition: NvInferRuntimeCommon.h:680
Application-implemented interface for profiling.
Definition: NvInferRuntime.h:556
virtual void reportLayerTime(char const *layerName, float ms) noexcept=0
Layer time reporting callback.
virtual ~IProfiler() noexcept
Definition: NvInferRuntime.h:566
Updates weights in an engine.
Definition: NvInferRuntime.h:800
int32_t getMaxThreads() const noexcept
get the maximum number of threads that can be used by the refitter.
Definition: NvInferRuntime.h:1060
bool setNamedWeights(char const *name, Weights weights) noexcept
Specify new weights of given name.
Definition: NvInferRuntime.h:986
int32_t getAllWeights(int32_t size, char const **weightsNames) noexcept
Get names of all weights that could be refit.
Definition: NvInferRuntime.h:1022
bool setDynamicRange(char const *tensorName, float min, float max) noexcept
Definition: NvInferRuntime.h:894
ILogger * getLogger() const noexcept
get the logger with which the refitter was created
Definition: NvInferRuntime.h:1032
int32_t getTensorsWithDynamicRange(int32_t size, char const **tensorNames) const noexcept
Get names of all tensors that have refittable dynamic ranges.
Definition: NvInferRuntime.h:934
float getDynamicRangeMin(char const *tensorName) const noexcept
Get minimum of dynamic range.
Definition: NvInferRuntime.h:906
bool refitCudaEngine() noexcept
Updates associated engine. Return true if successful.
Definition: NvInferRuntime.h:829
int32_t getMissingWeights(int32_t size, char const **weightsNames) noexcept
Get names of missing weights.
Definition: NvInferRuntime.h:1006
int32_t getMissing(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of missing weights.
Definition: NvInferRuntime.h:850
TRT_DEPRECATED void destroy() noexcept
Definition: NvInferRuntime.h:877
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:1046
float getDynamicRangeMax(char const *tensorName) const noexcept
Get maximum of dynamic range.
Definition: NvInferRuntime.h:918
apiv::VRefitter * mImpl
Definition: NvInferRuntime.h:1066
int32_t getAll(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of all weights that could be refit.
Definition: NvInferRuntime.h:867
virtual ~IRefitter() noexcept=default
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:953
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:968
Allows a serialized functionally unsafe engine to be deserialized.
Definition: NvInferRuntime.h:618
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:769
virtual ~IRuntime() noexcept=default
TRT_DEPRECATED void destroy() noexcept
Destroy this object.
Definition: NvInferRuntime.h:682
apiv::VRuntime * mImpl
Definition: NvInferRuntime.h:789
void setDLACore(int32_t dlaCore) noexcept
Sets the DLA core used by the network. Defaults to -1.
Definition: NvInferRuntime.h:653
int32_t getNbDLACores() const noexcept
Returns number of DLA hardware cores accessible or 0 if DLA is unavailable.
Definition: NvInferRuntime.h:670
ICudaEngine * deserializeCudaEngine(void const *blob, std::size_t size) noexcept
Deserialize an engine from a stream.
Definition: NvInferRuntime.h:745
int32_t getDLACore() const noexcept
Get the DLA core that the engine executes on.
Definition: NvInferRuntime.h:662
void setGpuAllocator(IGpuAllocator *allocator) noexcept
Set the GPU allocator.
Definition: NvInferRuntime.h:696
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:730
ILogger * getLogger() const noexcept
get the logger with which the runtime was created
Definition: NvInferRuntime.h:755
int32_t getMaxThreads() const noexcept
Get the maximum number of threads that can be used by the runtime.
Definition: NvInferRuntime.h:783
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:715
Register the plugin creator to the registry The static registry object will be instantiated when the ...
Definition: NvInferRuntime.h:2692
PluginRegistrar()
Definition: NvInferRuntime.h:2694
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:126
DataType type
The type of the weights.
Definition: NvInferRuntime.h:128
int64_t count
The number of weights in the array.
Definition: NvInferRuntime.h:130
void const * values
The weight values, in a contiguous array.
Definition: NvInferRuntime.h:129
IRuntime * createInferRuntime(ILogger &logger) noexcept
Create an instance of an safe::IRuntime class.
The TensorRT API version 1 namespace.
uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferRuntime.h:1305
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:69
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:192
@ kSUB
Substract the second element from the first.
@ kSUM
Sum of the two operands.
@ kPROD
Product of the two operands.
@ kFLOOR_DIV
Floor division of the first element by the second.
@ kEQUAL
1 if operands are equal, 0 otherwise.
@ kMIN
Minimum of the two operands.
@ kLESS
1 if first operand is less than second operand, 0 otherwise.
@ kMAX
Maximum of the two operands.
@ kCEIL_DIV
Division rounding up.
constexpr int32_t EnumMax< WeightsRole >() noexcept
Maximum number of elements in WeightsRole enum.
Definition: NvInferRuntime.h:587
char_t AsciiChar
AsciiChar is the type used by TensorRT to represent valid ASCII characters.
Definition: NvInferRuntimeCommon.h:88
@ kV2_DYNAMICEXT
IPluginV2DynamicExt.
constexpr int32_t EnumMax< LayerInformationFormat >() noexcept
Definition: NvInferRuntime.h:2484
DataType
The type of weights and tensors.
Definition: NvInferRuntimeCommon.h:114
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:598
@ kSCALE
Scale layer.
@ kCONSTANT
Constant layer.
@ kDEFAULT
Similar to ONNX Gather.
constexpr int32_t EnumMax< OptProfileSelector >() noexcept
Number of different values of OptProfileSelector enum.
Definition: NvInferRuntime.h:1092
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:576
@ kSHIFT
shift part of IScaleLayer
@ kANY
Any other weights role.
@ kBIAS
bias for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
@ kKERNEL
kernel for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
constexpr int32_t EnumMax< ProfilingVerbosity >() noexcept
Maximum number of profile verbosity levels in ProfilingVerbosity enum.
Definition: NvInferRuntime.h:1330
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
Definition: NvInferRuntime.h:1317
@ kLAYER_NAMES_ONLY
Print only the layer names. This is the default setting.
@ kDETAILED
Print detailed layer information including layer names and layer parameters.
TacticSource
List of tactic sources for TensorRT.
Definition: NvInferRuntime.h:1282
@ kCUBLAS_LT
cuBLAS LT tactics
@ kCUDNN
cuDNN tactics
@ kCUBLAS
cuBLAS tactics.
TensorFormat PluginFormat
PluginFormat is reserved for backward compatibility.
Definition: NvInferRuntimeCommon.h:308
@ kMIN
Minimum of the two elements.
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntimeCommon.h:183
constexpr int32_t EnumMax< TacticSource >() noexcept
Maximum number of tactic sources in TacticSource enum.
Definition: NvInferRuntime.h:1294
LayerInformationFormat
The format in which the IEngineInspector prints the layer information.
Definition: NvInferRuntime.h:2476
@ kJSON
Print layer information in JSON format.
@ kONELINE
Print layer information in one line per layer.
constexpr int32_t EnumMax< DeviceType >() noexcept
Maximum number of elements in DeviceType enum.
Definition: NvInferRuntime.h:605
constexpr int32_t EnumMax< DimensionOperation >() noexcept
Maximum number of elements in DimensionOperation enum.
Definition: NvInferRuntime.h:206
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntime.h:216
@ kHOST
Data stored on host.
@ kDEVICE
Data stored on device.
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:1080
@ kOPT
This is used to set or get the value that is used in the optimization (kernel selection).
Definition: NvInferRuntime.h:321
Dims min
Lower bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:326
Dims max
Upper bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:329
PluginTensorDesc desc
Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of ...
Definition: NvInferRuntime.h:323
Fields that a plugin might see for an input or output.
Definition: NvInferRuntimeCommon.h:332
Declaration of EnumMaxImpl struct to store maximum number of elements in an enumeration type.
Definition: NvInferRuntimeCommon.h:99

  Copyright © 2024 NVIDIA Corporation
  Privacy Policy | Manage My Privacy | Do Not Sell or Share My Data | Terms of Service | Accessibility | Corporate Policies | Product Security | Contact