TensorRT 8.2.1
NvInferRuntime.h
Go to the documentation of this file.
1/*
2 * Copyright 1993-2021 NVIDIA Corporation. All rights reserved.
3 *
4 * NOTICE TO LICENSEE:
5 *
6 * This source code and/or documentation ("Licensed Deliverables") are
7 * subject to NVIDIA intellectual property rights under U.S. and
8 * international Copyright laws.
9 *
10 * These Licensed Deliverables contained herein is PROPRIETARY and
11 * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12 * conditions of a form of NVIDIA software license agreement by and
13 * between NVIDIA and Licensee ("License Agreement") or electronically
14 * accepted by Licensee. Notwithstanding any terms or conditions to
15 * the contrary in the License Agreement, reproduction or disclosure
16 * of the Licensed Deliverables to any third party without the express
17 * written consent of NVIDIA is prohibited.
18 *
19 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20 * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21 * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22 * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23 * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24 * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25 * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27 * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28 * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29 * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32 * OF THESE LICENSED DELIVERABLES.
33 *
34 * U.S. Government End Users. These Licensed Deliverables are a
35 * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36 * 1995), consisting of "commercial computer software" and "commercial
37 * computer software documentation" as such terms are used in 48
38 * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39 * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40 * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41 * U.S. Government End Users acquire the Licensed Deliverables with
42 * only those rights set forth herein.
43 *
44 * Any use of the Licensed Deliverables in individual and commercial
45 * software must include, in the user documentation and internal
46 * comments to the code, the above Disclaimer and U.S. Government End
47 * Users Notice.
48 */
49
50#ifndef NV_INFER_RUNTIME_H
51#define NV_INFER_RUNTIME_H
52
58
59#include "NvInferImpl.h"
61
62namespace nvinfer1
63{
64
65class IExecutionContext;
66class ICudaEngine;
67class IPluginFactory;
68class IEngineInspector;
69
78
80{
81protected:
82 INoCopy() = default;
83 virtual ~INoCopy() = default;
84 INoCopy(const INoCopy& other) = delete;
85 INoCopy& operator=(const INoCopy& other) = delete;
86 INoCopy(INoCopy&& other) = delete;
87 INoCopy& operator=(INoCopy&& other) = delete;
88};
89
104
105enum class EngineCapability : int32_t
106{
111 kSTANDARD = 0,
112 kDEFAULT TRT_DEPRECATED_ENUM = kSTANDARD,
113
120 kSAFETY = 1,
121 kSAFE_GPU TRT_DEPRECATED_ENUM = kSAFETY,
122
128 kDLA_STANDALONE = 2,
129 kSAFE_DLA TRT_DEPRECATED_ENUM = kDLA_STANDALONE,
130};
131
132namespace impl
133{
135template <>
137{
138 static constexpr int32_t kVALUE = 3;
139};
140} // namespace impl
141
157{
158public:
160 const void* values;
161 int64_t count;
162};
163
174class IHostMemory : public INoCopy
175{
176public:
177 virtual ~IHostMemory() noexcept = default;
178
180 void* data() const noexcept
181 {
182 return mImpl->data();
183 }
184
186 std::size_t size() const noexcept
187 {
188 return mImpl->size();
189 }
190
192 DataType type() const noexcept
193 {
194 return mImpl->type();
195 }
203 TRT_DEPRECATED void destroy() noexcept
204 {
205 delete this;
206 }
207
208protected:
209 apiv::VHostMemory* mImpl;
210};
211
222enum class DimensionOperation : int32_t
223{
224 kSUM = 0,
225 kPROD = 1,
226 kMAX = 2,
227 kMIN = 3,
228 kSUB = 4,
229 kEQUAL = 5,
230 kLESS = 6,
231 kFLOOR_DIV = 7,
232 kCEIL_DIV = 8
233};
234
236template <>
237constexpr inline int32_t EnumMax<DimensionOperation>() noexcept
238{
239 return 9;
240}
241
246enum class TensorLocation : int32_t
247{
248 kDEVICE = 0,
249 kHOST = 1,
250};
251
252namespace impl
253{
255template <>
257{
258 static constexpr int32_t kVALUE = 2;
259};
260} // namespace impl
261
275{
276public:
278 bool isConstant() const noexcept
279 {
280 return mImpl->isConstant();
281 }
282
285 int32_t getConstantValue() const noexcept
286 {
287 return mImpl->getConstantValue();
288 }
289
290protected:
291 apiv::VDimensionExpr* mImpl;
292 virtual ~IDimensionExpr() noexcept = default;
293};
294
312class IExprBuilder : public INoCopy
313{
314public:
316 const IDimensionExpr* constant(int32_t value) noexcept
317 {
318 return mImpl->constant(value);
319 }
320
324 DimensionOperation op, const IDimensionExpr& first, const IDimensionExpr& second) noexcept
325 {
326 return mImpl->operation(op, first, second);
327 }
328
329protected:
330 apiv::VExprBuilder* mImpl;
331 virtual ~IExprBuilder() noexcept = default;
332};
333
340{
341public:
342 int32_t nbDims;
344};
345
352{
355
358
361};
362
382{
383public:
384 IPluginV2DynamicExt* clone() const noexcept override = 0;
385
410 virtual DimsExprs getOutputDimensions(
411 int32_t outputIndex, const DimsExprs* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept
412 = 0;
413
417 static constexpr int32_t kFORMAT_COMBINATION_LIMIT = 100;
418
451 virtual bool supportsFormatCombination(
452 int32_t pos, const PluginTensorDesc* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept
453 = 0;
454
493 virtual void configurePlugin(const DynamicPluginTensorDesc* in, int32_t nbInputs,
494 const DynamicPluginTensorDesc* out, int32_t nbOutputs) noexcept
495 = 0;
496
506 virtual size_t getWorkspaceSize(const PluginTensorDesc* inputs, int32_t nbInputs, const PluginTensorDesc* outputs,
507 int32_t nbOutputs) const noexcept
508 = 0;
509
522 virtual int32_t enqueue(const PluginTensorDesc* inputDesc, const PluginTensorDesc* outputDesc,
523 const void* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept
524 = 0;
525
526protected:
534 int32_t getTensorRTVersion() const noexcept override
535 {
536 return (static_cast<int32_t>(PluginVersion::kV2_DYNAMICEXT) << 24 | (NV_TENSORRT_VERSION & 0xFFFFFF));
537 }
538
539 virtual ~IPluginV2DynamicExt() noexcept {}
540
541private:
542 // Following are obsolete base class methods, and must not be implemented or used.
543
544 void configurePlugin(Dims const*, int32_t, Dims const*, int32_t, DataType const*, DataType const*, bool const*,
545 bool const*, PluginFormat, int32_t) noexcept override final
546 {
547 }
548
549 bool supportsFormat(DataType, PluginFormat) const noexcept override final
550 {
551 return false;
552 }
553
554 Dims getOutputDimensions(int32_t, Dims const*, int32_t) noexcept override final
555 {
556 return Dims{-1, {}};
557 }
558
559 bool isOutputBroadcastAcrossBatch(int32_t, bool const*, int32_t) const noexcept override final
560 {
561 return false;
562 }
563
564 bool canBroadcastInputAcrossBatch(int32_t) const noexcept override final
565 {
566 return true;
567 }
568
569 size_t getWorkspaceSize(int32_t) const noexcept override final
570 {
571 return 0;
572 }
573
574 int32_t enqueue(int32_t, const void* const*, void* const*, void*, cudaStream_t) noexcept override final
575 {
576 return 1;
577 }
578};
579
591{
592public:
599 virtual void reportLayerTime(const char* layerName, float ms) noexcept = 0;
600
601 virtual ~IProfiler() noexcept {}
602};
603
610enum class WeightsRole : int32_t
611{
612 kKERNEL = 0,
613 kBIAS = 1,
614 kSHIFT = 2,
615 kSCALE = 3,
616 kCONSTANT = 4,
617 kANY = 5,
618};
619
621template <>
622constexpr inline int32_t EnumMax<WeightsRole>() noexcept
623{
624 return 6;
625}
626
632enum class DeviceType : int32_t
633{
634 kGPU,
635 kDLA,
636};
637
639template <>
640constexpr inline int32_t EnumMax<DeviceType>() noexcept
641{
642 return 2;
643}
644
652class IRuntime : public INoCopy
653{
654public:
655 virtual ~IRuntime() noexcept = default;
656
673 const void* blob, std::size_t size, IPluginFactory* pluginFactory) noexcept
674 {
675 return mImpl->deserializeCudaEngine(blob, size, nullptr);
676 }
677
685 void setDLACore(int32_t dlaCore) noexcept
686 {
687 mImpl->setDLACore(dlaCore);
688 }
689
696 int32_t getDLACore() const noexcept
697 {
698 return mImpl->getDLACore();
699 }
700
704 int32_t getNbDLACores() const noexcept
705 {
706 return mImpl->getNbDLACores();
707 }
708
716 TRT_DEPRECATED void destroy() noexcept
717 {
718 delete this;
719 }
720
730 void setGpuAllocator(IGpuAllocator* allocator) noexcept
731 {
732 mImpl->setGpuAllocator(allocator);
733 }
734
746 //
749 void setErrorRecorder(IErrorRecorder* recorder) noexcept
750 {
751 mImpl->setErrorRecorder(recorder);
752 }
753
765 {
766 return mImpl->getErrorRecorder();
767 }
768
779 ICudaEngine* deserializeCudaEngine(const void* blob, std::size_t size) noexcept
780 {
781 return mImpl->deserializeCudaEngine(blob, size, nullptr);
782 }
783
789 ILogger* getLogger() const noexcept
790 {
791 return mImpl->getLogger();
792 }
793
794protected:
795 apiv::VRuntime* mImpl;
796};
797
805class IRefitter : public INoCopy
806{
807public:
808 virtual ~IRefitter() noexcept = default;
809
820 bool setWeights(const char* layerName, WeightsRole role, Weights weights) noexcept
821 {
822 return mImpl->setWeights(layerName, role, weights);
823 }
824
835 bool refitCudaEngine() noexcept
836 {
837 return mImpl->refitCudaEngine();
838 }
839
856 int32_t getMissing(int32_t size, const char** layerNames, WeightsRole* roles) noexcept
857 {
858 return mImpl->getMissing(size, layerNames, roles);
859 }
860
873 int32_t getAll(int32_t size, const char** layerNames, WeightsRole* roles) noexcept
874 {
875 return mImpl->getAll(size, layerNames, roles);
876 }
877
883 TRT_DEPRECATED void destroy() noexcept
884 {
885 delete this;
886 }
887
900 bool setDynamicRange(const char* tensorName, float min, float max) noexcept
901 {
902 return mImpl->setDynamicRange(tensorName, min, max);
903 }
904
912 float getDynamicRangeMin(const char* tensorName) const noexcept
913 {
914 return mImpl->getDynamicRangeMin(tensorName);
915 }
916
924 float getDynamicRangeMax(const char* tensorName) const noexcept
925 {
926 return mImpl->getDynamicRangeMax(tensorName);
927 }
928
940 int32_t getTensorsWithDynamicRange(int32_t size, const char** tensorNames) const noexcept
941 {
942 return mImpl->getTensorsWithDynamicRange(size, tensorNames);
943 }
944
956 //
959 void setErrorRecorder(IErrorRecorder* recorder) noexcept
960 {
961 mImpl->setErrorRecorder(recorder);
962 }
963
975 {
976 return mImpl->getErrorRecorder();
977 }
978
992 bool setNamedWeights(const char* name, Weights weights) noexcept
993 {
994 return mImpl->setNamedWeights(name, weights);
995 }
996
1012 int32_t getMissingWeights(int32_t size, const char** weightsNames) noexcept
1013 {
1014 return mImpl->getMissingWeights(size, weightsNames);
1015 }
1016
1028 int32_t getAllWeights(int32_t size, const char** weightsNames) noexcept
1029 {
1030 return mImpl->getAllWeights(size, weightsNames);
1031 }
1032
1038 ILogger* getLogger() const noexcept
1039 {
1040 return mImpl->getLogger();
1041 }
1042
1043protected:
1044 apiv::VRefitter* mImpl;
1045};
1046
1057enum class OptProfileSelector : int32_t
1058{
1059 kMIN = 0,
1060 kOPT = 1,
1061 kMAX = 2
1062};
1063
1065template <>
1066constexpr inline int32_t EnumMax<OptProfileSelector>() noexcept
1067{
1068 return 3;
1069}
1070
1094{
1095public:
1121 bool setDimensions(const char* inputName, OptProfileSelector select, Dims dims) noexcept
1122 {
1123 return mImpl->setDimensions(inputName, select, dims);
1124 }
1125
1131 Dims getDimensions(const char* inputName, OptProfileSelector select) const noexcept
1132 {
1133 return mImpl->getDimensions(inputName, select);
1134 }
1135
1175 const char* inputName, OptProfileSelector select, const int32_t* values, int32_t nbValues) noexcept
1176 {
1177 return mImpl->setShapeValues(inputName, select, values, nbValues);
1178 }
1179
1186 int32_t getNbShapeValues(const char* inputName) const noexcept
1187 {
1188 return mImpl->getNbShapeValues(inputName);
1189 }
1190
1196 int32_t const* getShapeValues(const char* inputName, OptProfileSelector select) const noexcept
1197 {
1198 return mImpl->getShapeValues(inputName, select);
1199 }
1200
1214 bool setExtraMemoryTarget(float target) noexcept
1215 {
1216 return mImpl->setExtraMemoryTarget(target);
1217 }
1218
1222 float getExtraMemoryTarget() const noexcept
1223 {
1224 return mImpl->getExtraMemoryTarget();
1225 }
1226
1238 bool isValid() const noexcept
1239 {
1240 return mImpl->isValid();
1241 }
1242
1243protected:
1244 apiv::VOptimizationProfile* mImpl;
1245 virtual ~IOptimizationProfile() noexcept = default;
1246};
1247
1255enum class TacticSource : int32_t
1256{
1258 kCUBLAS = 0,
1259 kCUBLAS_LT = 1,
1260 kCUDNN = 2
1261};
1262
1263template <>
1264constexpr inline int32_t EnumMax<TacticSource>() noexcept
1265{
1266 return 3;
1267}
1268
1275using TacticSources = uint32_t;
1276
1286enum class ProfilingVerbosity : int32_t
1287{
1288 kLAYER_NAMES_ONLY = 0,
1289 kNONE = 1,
1290 kDETAILED = 2,
1291 kDEFAULT TRT_DEPRECATED_ENUM = kLAYER_NAMES_ONLY,
1292 kVERBOSE TRT_DEPRECATED_ENUM = kDETAILED
1293};
1294
1296template <>
1297constexpr inline int32_t EnumMax<ProfilingVerbosity>() noexcept
1298{
1299 return 3;
1300}
1301
1309class ICudaEngine : public INoCopy
1310{
1311public:
1312 virtual ~ICudaEngine() noexcept = default;
1313
1324 int32_t getNbBindings() const noexcept
1325 {
1326 return mImpl->getNbBindings();
1327 }
1328
1346 int32_t getBindingIndex(const char* name) const noexcept
1347 {
1348 return mImpl->getBindingIndex(name);
1349 }
1350
1366 const char* getBindingName(int32_t bindingIndex) const noexcept
1367 {
1368 return mImpl->getBindingName(bindingIndex);
1369 }
1370
1379 bool bindingIsInput(int32_t bindingIndex) const noexcept
1380 {
1381 return mImpl->bindingIsInput(bindingIndex);
1382 }
1383
1404 Dims getBindingDimensions(int32_t bindingIndex) const noexcept
1405 {
1406 return mImpl->getBindingDimensions(bindingIndex);
1407 }
1408
1417 DataType getBindingDataType(int32_t bindingIndex) const noexcept
1418 {
1419 return mImpl->getBindingDataType(bindingIndex);
1420 }
1421
1429 int32_t getMaxBatchSize() const noexcept
1430 {
1431 return mImpl->getMaxBatchSize();
1432 }
1433
1443 int32_t getNbLayers() const noexcept
1444 {
1445 return mImpl->getNbLayers();
1446 }
1447
1457 IHostMemory* serialize() const noexcept
1458 {
1459 return mImpl->serialize();
1460 }
1461
1474 {
1475 return mImpl->createExecutionContext();
1476 }
1477
1485 TRT_DEPRECATED void destroy() noexcept
1486 {
1487 delete this;
1488 }
1489
1500 TensorLocation getLocation(int32_t bindingIndex) const noexcept
1501 {
1502 return mImpl->getLocation(bindingIndex);
1503 }
1504
1510 {
1511 return mImpl->createExecutionContextWithoutDeviceMemory();
1512 }
1513
1519 size_t getDeviceMemorySize() const noexcept
1520 {
1521 return mImpl->getDeviceMemorySize();
1522 }
1523
1529 bool isRefittable() const noexcept
1530 {
1531 return mImpl->isRefittable();
1532 }
1533
1543 int32_t getBindingBytesPerComponent(int32_t bindingIndex) const noexcept
1544 {
1545 return mImpl->getBindingBytesPerComponent(bindingIndex);
1546 }
1547
1557 int32_t getBindingComponentsPerElement(int32_t bindingIndex) const noexcept
1558 {
1559 return mImpl->getBindingComponentsPerElement(bindingIndex);
1560 }
1561
1567 TensorFormat getBindingFormat(int32_t bindingIndex) const noexcept
1568 {
1569 return mImpl->getBindingFormat(bindingIndex);
1570 }
1571
1586 const char* getBindingFormatDesc(int32_t bindingIndex) const noexcept
1587 {
1588 return mImpl->getBindingFormatDesc(bindingIndex);
1589 }
1590
1598 int32_t getBindingVectorizedDim(int32_t bindingIndex) const noexcept
1599 {
1600 return mImpl->getBindingVectorizedDim(bindingIndex);
1601 }
1602
1613 const char* getName() const noexcept
1614 {
1615 return mImpl->getName();
1616 }
1617
1624 int32_t getNbOptimizationProfiles() const noexcept
1625 {
1626 return mImpl->getNbOptimizationProfiles();
1627 }
1628
1651 Dims getProfileDimensions(int32_t bindingIndex, int32_t profileIndex, OptProfileSelector select) const noexcept
1652 {
1653 return mImpl->getProfileDimensions(bindingIndex, profileIndex, select);
1654 }
1655
1677 const int32_t* getProfileShapeValues(int32_t profileIndex, int32_t inputIndex, OptProfileSelector select) const
1678 noexcept
1679 {
1680 return mImpl->getProfileShapeValues(profileIndex, inputIndex, select);
1681 }
1682
1714 bool isShapeBinding(int32_t bindingIndex) const noexcept
1715 {
1716 return mImpl->isShapeBinding(bindingIndex);
1717 }
1718
1728 bool isExecutionBinding(int32_t bindingIndex) const noexcept
1729 {
1730 return mImpl->isExecutionBinding(bindingIndex);
1731 }
1732
1744 {
1745 return mImpl->getEngineCapability();
1746 }
1747
1758 //
1761 void setErrorRecorder(IErrorRecorder* recorder) noexcept
1762 {
1763 return mImpl->setErrorRecorder(recorder);
1764 }
1765
1777 {
1778 return mImpl->getErrorRecorder();
1779 }
1780
1795 bool hasImplicitBatchDimension() const noexcept
1796 {
1797 return mImpl->hasImplicitBatchDimension();
1798 }
1799
1805 {
1806 return mImpl->getTacticSources();
1807 }
1808
1816 {
1817 return mImpl->getProfilingVerbosity();
1818 }
1819
1826 {
1827 return mImpl->createEngineInspector();
1828 }
1829
1830protected:
1831 apiv::VCudaEngine* mImpl;
1832};
1833
1845{
1846public:
1847 virtual ~IExecutionContext() noexcept = default;
1848
1866 bool execute(int32_t batchSize, void* const* bindings) noexcept
1867 {
1868 return mImpl->execute(batchSize, bindings);
1869 }
1870
1895 bool enqueue(int32_t batchSize, void* const* bindings, cudaStream_t stream, cudaEvent_t* inputConsumed) noexcept
1896 {
1897 return mImpl->enqueue(batchSize, bindings, stream, inputConsumed);
1898 }
1899
1908 void setDebugSync(bool sync) noexcept
1909 {
1910 mImpl->setDebugSync(sync);
1911 }
1912
1918 bool getDebugSync() const noexcept
1919 {
1920 return mImpl->getDebugSync();
1921 }
1922
1928 void setProfiler(IProfiler* profiler) noexcept
1929 {
1930 mImpl->setProfiler(profiler);
1931 }
1932
1938 IProfiler* getProfiler() const noexcept
1939 {
1940 return mImpl->getProfiler();
1941 }
1942
1948 const ICudaEngine& getEngine() const noexcept
1949 {
1950 return mImpl->getEngine();
1951 }
1952
1960 TRT_DEPRECATED void destroy() noexcept
1961 {
1962 delete this;
1963 }
1964
1972 void setName(const char* name) noexcept
1973 {
1974 mImpl->setName(name);
1975 }
1976
1982 const char* getName() const noexcept
1983 {
1984 return mImpl->getName();
1985 }
1986
1998 void setDeviceMemory(void* memory) noexcept
1999 {
2000 mImpl->setDeviceMemory(memory);
2001 }
2002
2019 Dims getStrides(int32_t bindingIndex) const noexcept
2020 {
2021 return mImpl->getStrides(bindingIndex);
2022 }
2023
2024public:
2061 bool setOptimizationProfile(int32_t profileIndex) noexcept
2062 {
2063 return mImpl->setOptimizationProfile(profileIndex);
2064 }
2065
2073 int32_t getOptimizationProfile() const noexcept
2074 {
2075 return mImpl->getOptimizationProfile();
2076 }
2077
2110 bool setBindingDimensions(int32_t bindingIndex, Dims dimensions) noexcept
2111 {
2112 return mImpl->setBindingDimensions(bindingIndex, dimensions);
2113 }
2114
2140 Dims getBindingDimensions(int32_t bindingIndex) const noexcept
2141 {
2142 return mImpl->getBindingDimensions(bindingIndex);
2143 }
2144
2170 bool setInputShapeBinding(int32_t bindingIndex, int32_t const* data) noexcept
2171 {
2172 return mImpl->setInputShapeBinding(bindingIndex, data);
2173 }
2174
2192 bool getShapeBinding(int32_t bindingIndex, int32_t* data) const noexcept
2193 {
2194 return mImpl->getShapeBinding(bindingIndex, data);
2195 }
2196
2207 bool allInputDimensionsSpecified() const noexcept
2208 {
2209 return mImpl->allInputDimensionsSpecified();
2210 }
2211
2221 bool allInputShapesSpecified() const noexcept
2222
2223 {
2224 return mImpl->allInputShapesSpecified();
2225 }
2226
2238 //
2241 void setErrorRecorder(IErrorRecorder* recorder) noexcept
2242 {
2243 mImpl->setErrorRecorder(recorder);
2244 }
2245
2257 {
2258 return mImpl->getErrorRecorder();
2259 }
2260
2273 bool executeV2(void* const* bindings) noexcept
2274 {
2275 return mImpl->executeV2(bindings);
2276 }
2277
2301 bool enqueueV2(void* const* bindings, cudaStream_t stream, cudaEvent_t* inputConsumed) noexcept
2302 {
2303 return mImpl->enqueueV2(bindings, stream, inputConsumed);
2304 }
2305
2349 bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
2350 {
2351 return mImpl->setOptimizationProfileAsync(profileIndex, stream);
2352 }
2353
2364 void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
2365 {
2366 mImpl->setEnqueueEmitsProfile(enqueueEmitsProfile);
2367 }
2368
2375 bool getEnqueueEmitsProfile() const noexcept
2376 {
2377 return mImpl->getEnqueueEmitsProfile();
2378 }
2379
2402 bool reportToProfiler() const noexcept
2403 {
2404 return mImpl->reportToProfiler();
2405 }
2406
2407protected:
2408 apiv::VExecutionContext* mImpl;
2409}; // class IExecutionContext
2410
2418enum class LayerInformationFormat : int32_t
2419{
2420 kONELINE = 0,
2421 kJSON = 1,
2422};
2423
2426template <>
2427constexpr inline int32_t EnumMax<LayerInformationFormat>() noexcept
2428{
2429 return 2;
2430}
2431
2448{
2449public:
2450 virtual ~IEngineInspector() noexcept = default;
2451
2464 bool setExecutionContext(IExecutionContext const* context) noexcept
2465 {
2466 return mImpl->setExecutionContext(context);
2467 }
2468
2477 {
2478 return mImpl->getExecutionContext();
2479 }
2480
2501 AsciiChar const* getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
2502 {
2503 return mImpl->getLayerInformation(layerIndex, format);
2504 }
2505
2527 {
2528 return mImpl->getEngineInformation(format);
2529 }
2530
2542 //
2545 void setErrorRecorder(IErrorRecorder* recorder) noexcept
2546 {
2547 mImpl->setErrorRecorder(recorder);
2548 }
2549
2561 {
2562 return mImpl->getErrorRecorder();
2563 }
2564
2565protected:
2566 apiv::VEngineInspector* mImpl;
2567}; // class IEngineInspector
2568
2569} // namespace nvinfer1
2570
2575extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(void* logger, int32_t version) noexcept;
2576
2581extern "C" TENSORRTAPI void* createInferRefitter_INTERNAL(void* engine, void* logger, int32_t version) noexcept;
2582
2586extern "C" TENSORRTAPI nvinfer1::IPluginRegistry* getPluginRegistry() noexcept;
2587
2593extern "C" TENSORRTAPI nvinfer1::ILogger* getLogger() noexcept;
2594
2595namespace nvinfer1
2596{
2597namespace // unnamed namespace avoids linkage surprises when linking objects built with different versions of this
2598 // header.
2599{
2605inline IRuntime* createInferRuntime(ILogger& logger) noexcept
2606{
2607 return static_cast<IRuntime*>(createInferRuntime_INTERNAL(&logger, NV_TENSORRT_VERSION));
2608}
2609
2615inline IRefitter* createInferRefitter(ICudaEngine& engine, ILogger& logger) noexcept
2616{
2617 return static_cast<IRefitter*>(createInferRefitter_INTERNAL(&engine, &logger, NV_TENSORRT_VERSION));
2618}
2619
2620} // namespace
2621
2633template <typename T>
2635{
2636public:
2638 {
2639 getPluginRegistry()->registerCreator(instance, "");
2640 }
2641
2642private:
2644 T instance{};
2645};
2646
2647} // namespace nvinfer1
2648
2649#define REGISTER_TENSORRT_PLUGIN(name) \
2650 static nvinfer1::PluginRegistrar<name> pluginRegistrar##name {}
2651#endif // NV_INFER_RUNTIME_H
nvinfer1::ILogger * getLogger() noexcept
Return the logger object.
nvinfer1::IPluginRegistry * getPluginRegistry() noexcept
Return the plugin registry.
#define TRT_DEPRECATED
< Items that are marked as deprecated will be removed in a future release.
Definition: NvInferRuntimeCommon.h:77
Structure to define the dimensions of a tensor.
Definition: NvInferRuntimeCommon.h:190
static constexpr int32_t MAX_DIMS
The maximum rank (number of dimensions) supported for a tensor.
Definition: NvInferRuntimeCommon.h:193
Definition: NvInferRuntime.h:340
int32_t nbDims
The number of dimensions.
Definition: NvInferRuntime.h:342
An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:1310
int32_t getBindingBytesPerComponent(int32_t bindingIndex) const noexcept
Return the number of bytes per component of an element.
Definition: NvInferRuntime.h:1543
int32_t getBindingComponentsPerElement(int32_t bindingIndex) const noexcept
Return the number of components included in one element.
Definition: NvInferRuntime.h:1557
bool isShapeBinding(int32_t bindingIndex) const noexcept
True if tensor is required as input for shape calculations or output from them.
Definition: NvInferRuntime.h:1714
bool hasImplicitBatchDimension() const noexcept
Query whether the engine was built with an implicit batch dimension.
Definition: NvInferRuntime.h:1795
const char * getName() const noexcept
Returns the name of the network associated with the engine.
Definition: NvInferRuntime.h:1613
EngineCapability getEngineCapability() const noexcept
Determine what execution capability this engine has.
Definition: NvInferRuntime.h:1743
TRT_DEPRECATED void destroy() noexcept
Destroy this object;.
Definition: NvInferRuntime.h:1485
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:1776
TensorFormat getBindingFormat(int32_t bindingIndex) const noexcept
Return the binding format.
Definition: NvInferRuntime.h:1567
const int32_t * getProfileShapeValues(int32_t profileIndex, int32_t inputIndex, OptProfileSelector select) const noexcept
Get minimum / optimum / maximum values for an input shape binding under an optimization profile.
Definition: NvInferRuntime.h:1677
int32_t getNbBindings() const noexcept
Get the number of binding indices.
Definition: NvInferRuntime.h:1324
TensorLocation getLocation(int32_t bindingIndex) const noexcept
Get location of binding.
Definition: NvInferRuntime.h:1500
Dims getProfileDimensions(int32_t bindingIndex, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a particular binding under an optimization profile...
Definition: NvInferRuntime.h:1651
Dims getBindingDimensions(int32_t bindingIndex) const noexcept
Get the dimensions of a binding.
Definition: NvInferRuntime.h:1404
int32_t getBindingVectorizedDim(int32_t bindingIndex) const noexcept
Return the dimension index that the buffer is vectorized.
Definition: NvInferRuntime.h:1598
int32_t getMaxBatchSize() const noexcept
Get the maximum batch size which can be used for inference.
Definition: NvInferRuntime.h:1429
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:1761
size_t getDeviceMemorySize() const noexcept
Return the amount of device memory required by an execution context.
Definition: NvInferRuntime.h:1519
TacticSources getTacticSources() const noexcept
return the tactic sources required by this engine
Definition: NvInferRuntime.h:1804
ProfilingVerbosity getProfilingVerbosity() const noexcept
Return the ProfilingVerbosity the builder config was set to when the engine was built.
Definition: NvInferRuntime.h:1815
IHostMemory * serialize() const noexcept
Serialize the network to a stream.
Definition: NvInferRuntime.h:1457
IExecutionContext * createExecutionContextWithoutDeviceMemory() noexcept
create an execution context without any device memory allocated
Definition: NvInferRuntime.h:1509
int32_t getBindingIndex(const char *name) const noexcept
Retrieve the binding index for a named tensor.
Definition: NvInferRuntime.h:1346
DataType getBindingDataType(int32_t bindingIndex) const noexcept
Determine the required data type for a buffer from its binding index.
Definition: NvInferRuntime.h:1417
IExecutionContext * createExecutionContext() noexcept
Create an execution context.
Definition: NvInferRuntime.h:1473
IEngineInspector * createEngineInspector() const noexcept
Create a new engine inspector which prints the layer information in an engine or an execution context...
Definition: NvInferRuntime.h:1825
const char * getBindingName(int32_t bindingIndex) const noexcept
Retrieve the name corresponding to a binding index.
Definition: NvInferRuntime.h:1366
int32_t getNbOptimizationProfiles() const noexcept
Get the number of optimization profiles defined for this engine.
Definition: NvInferRuntime.h:1624
int32_t getNbLayers() const noexcept
Get the number of layers in the network.
Definition: NvInferRuntime.h:1443
const char * getBindingFormatDesc(int32_t bindingIndex) const noexcept
Return the human readable description of the tensor format.
Definition: NvInferRuntime.h:1586
bool bindingIsInput(int32_t bindingIndex) const noexcept
Determine whether a binding is an input binding.
Definition: NvInferRuntime.h:1379
bool isExecutionBinding(int32_t bindingIndex) const noexcept
True if pointer to tensor data is required for execution phase, false if nullptr can be supplied.
Definition: NvInferRuntime.h:1728
bool isRefittable() const noexcept
Return true if an engine can be refit.
Definition: NvInferRuntime.h:1529
Definition: NvInferRuntime.h:275
bool isConstant() const noexcept
Return true if expression is a build-time constant.
Definition: NvInferRuntime.h:278
int32_t getConstantValue() const noexcept
Definition: NvInferRuntime.h:285
An engine inspector which prints out the layer information of an engine or an execution context.
Definition: NvInferRuntime.h:2448
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:2560
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:2545
AsciiChar const * getEngineInformation(LayerInformationFormat format) const noexcept
Get a string describing the information about all the layers in the current engine or the execution c...
Definition: NvInferRuntime.h:2526
IExecutionContext const * getExecutionContext() const noexcept
Get the context currently being inspected.
Definition: NvInferRuntime.h:2476
AsciiChar const * getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
Get a string describing the information about a specific layer in the current engine or the execution...
Definition: NvInferRuntime.h:2501
bool setExecutionContext(IExecutionContext const *context) noexcept
Set an execution context as the inspection source.
Definition: NvInferRuntime.h:2464
Reference counted application-implemented error reporting interface for TensorRT objects.
Definition: NvInferRuntimeCommon.h:1693
Context for executing inference using an engine, with functionally unsafe features.
Definition: NvInferRuntime.h:1845
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:2256
bool reportToProfiler() const noexcept
Calculate layer timing info for the current optimization profile in IExecutionContext and update the ...
Definition: NvInferRuntime.h:2402
void setDeviceMemory(void *memory) noexcept
Set the device memory for use by this execution context.
Definition: NvInferRuntime.h:1998
Dims getBindingDimensions(int32_t bindingIndex) const noexcept
Get the dynamic dimensions of a binding.
Definition: NvInferRuntime.h:2140
bool getShapeBinding(int32_t bindingIndex, int32_t *data) const noexcept
Get values of an input tensor required for shape calculations or an output tensor produced by shape c...
Definition: NvInferRuntime.h:2192
void setDebugSync(bool sync) noexcept
Set the debug sync flag.
Definition: NvInferRuntime.h:1908
bool enqueueV2(void *const *bindings, cudaStream_t stream, cudaEvent_t *inputConsumed) noexcept
Asynchronously execute inference.
Definition: NvInferRuntime.h:2301
void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
Set whether enqueue emits layer timing to the profiler.
Definition: NvInferRuntime.h:2364
bool setBindingDimensions(int32_t bindingIndex, Dims dimensions) noexcept
Set the dynamic dimensions of a binding.
Definition: NvInferRuntime.h:2110
bool setInputShapeBinding(int32_t bindingIndex, int32_t const *data) noexcept
Set values of input tensor required by shape calculations.
Definition: NvInferRuntime.h:2170
bool executeV2(void *const *bindings) noexcept
Synchronously execute inference a network.
Definition: NvInferRuntime.h:2273
bool getEnqueueEmitsProfile() const noexcept
Get the enqueueEmitsProfile state.
Definition: NvInferRuntime.h:2375
bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
Select an optimization profile for the current context with async semantics.
Definition: NvInferRuntime.h:2349
int32_t getOptimizationProfile() const noexcept
Get the index of the currently selected optimization profile.
Definition: NvInferRuntime.h:2073
bool enqueue(int32_t batchSize, void *const *bindings, cudaStream_t stream, cudaEvent_t *inputConsumed) noexcept
Asynchronously execute inference on a batch.
Definition: NvInferRuntime.h:1895
TRT_DEPRECATED void destroy() noexcept
Destroy this object.
Definition: NvInferRuntime.h:1960
bool getDebugSync() const noexcept
Get the debug sync flag.
Definition: NvInferRuntime.h:1918
bool execute(int32_t batchSize, void *const *bindings) noexcept
Synchronously execute inference on a batch.
Definition: NvInferRuntime.h:1866
const char * getName() const noexcept
Return the name of the execution context.
Definition: NvInferRuntime.h:1982
Dims getStrides(int32_t bindingIndex) const noexcept
Return the strides of the buffer for the given binding.
Definition: NvInferRuntime.h:2019
IProfiler * getProfiler() const noexcept
Get the profiler.
Definition: NvInferRuntime.h:1938
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:2241
TRT_DEPRECATED bool setOptimizationProfile(int32_t profileIndex) noexcept
Select an optimization profile for the current context.
Definition: NvInferRuntime.h:2061
bool allInputDimensionsSpecified() const noexcept
Whether all dynamic dimensions of input tensors have been specified.
Definition: NvInferRuntime.h:2207
const ICudaEngine & getEngine() const noexcept
Get the associated engine.
Definition: NvInferRuntime.h:1948
void setProfiler(IProfiler *profiler) noexcept
Set the profiler.
Definition: NvInferRuntime.h:1928
bool allInputShapesSpecified() const noexcept
Whether all input shape bindings have been specified.
Definition: NvInferRuntime.h:2221
void setName(const char *name) noexcept
Set the name of the execution context.
Definition: NvInferRuntime.h:1972
Definition: NvInferRuntime.h:313
const IDimensionExpr * constant(int32_t value) noexcept
Return pointer to IDimensionExp for given value.
Definition: NvInferRuntime.h:316
const IDimensionExpr * operation(DimensionOperation op, const IDimensionExpr &first, const IDimensionExpr &second) noexcept
Definition: NvInferRuntime.h:323
Application-implemented class for controlling allocation on the GPU.
Definition: NvInferRuntimeCommon.h:1372
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:175
void * data() const noexcept
A pointer to the raw data that is owned by the library.
Definition: NvInferRuntime.h:180
DataType type() const noexcept
The type of the memory that was allocated.
Definition: NvInferRuntime.h:192
TRT_DEPRECATED void destroy() noexcept
Definition: NvInferRuntime.h:203
std::size_t size() const noexcept
The size in bytes of the data that was allocated.
Definition: NvInferRuntime.h:186
Application-implemented logging interface for the builder, refitter and runtime.
Definition: NvInferRuntimeCommon.h:1506
Forward declaration of IEngineInspector for use by other interfaces.
Definition: NvInferRuntime.h:80
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:1094
bool setDimensions(const char *inputName, OptProfileSelector select, Dims dims) noexcept
Set the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:1121
int32_t getNbShapeValues(const char *inputName) const noexcept
Get the number of values for an input shape tensor.
Definition: NvInferRuntime.h:1186
float getExtraMemoryTarget() const noexcept
Get the extra memory target that has been defined for this profile.
Definition: NvInferRuntime.h:1222
Dims getDimensions(const char *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:1131
bool setExtraMemoryTarget(float target) noexcept
Set a target for extra GPU memory that may be used by this profile.
Definition: NvInferRuntime.h:1214
int32_t const * getShapeValues(const char *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:1196
bool setShapeValues(const char *inputName, OptProfileSelector select, const int32_t *values, int32_t nbValues) noexcept
Set the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:1174
bool isValid() const noexcept
Check whether the optimization profile can be passed to an IBuilderConfig object.
Definition: NvInferRuntime.h:1238
Single registration point for all plugins in an application. It is used to find plugin implementation...
Definition: NvInferRuntimeCommon.h:1244
virtual bool registerCreator(IPluginCreator &creator, AsciiChar const *const pluginNamespace) noexcept=0
Register a plugin creator. Returns false if one with same type is already registered.
Definition: NvInferRuntime.h:382
IPluginV2DynamicExt * clone() const noexcept override=0
Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin...
Plugin class for user-implemented layers.
Definition: NvInferRuntimeCommon.h:714
Application-implemented interface for profiling.
Definition: NvInferRuntime.h:591
virtual void reportLayerTime(const char *layerName, float ms) noexcept=0
Layer time reporting callback.
Updates weights in an engine.
Definition: NvInferRuntime.h:806
float getDynamicRangeMin(const char *tensorName) const noexcept
Get minimum of dynamic range.
Definition: NvInferRuntime.h:912
bool setDynamicRange(const char *tensorName, float min, float max) noexcept
Definition: NvInferRuntime.h:900
bool setWeights(const char *layerName, WeightsRole role, Weights weights) noexcept
Specify new weights for a layer of given name. Returns true on success, or false if new weights are r...
Definition: NvInferRuntime.h:820
ILogger * getLogger() const noexcept
get the logger with which the refitter was created
Definition: NvInferRuntime.h:1038
bool refitCudaEngine() noexcept
Updates associated engine. Return true if successful.
Definition: NvInferRuntime.h:835
int32_t getMissing(int32_t size, const char **layerNames, WeightsRole *roles) noexcept
Get description of missing weights.
Definition: NvInferRuntime.h:856
int32_t getTensorsWithDynamicRange(int32_t size, const char **tensorNames) const noexcept
Get names of all tensors that have refittable dynamic ranges.
Definition: NvInferRuntime.h:940
TRT_DEPRECATED void destroy() noexcept
Definition: NvInferRuntime.h:883
int32_t getAll(int32_t size, const char **layerNames, WeightsRole *roles) noexcept
Get description of all weights that could be refit.
Definition: NvInferRuntime.h:873
int32_t getMissingWeights(int32_t size, const char **weightsNames) noexcept
Get names of missing weights.
Definition: NvInferRuntime.h:1012
bool setNamedWeights(const char *name, Weights weights) noexcept
Specify new weights of given name.
Definition: NvInferRuntime.h:992
float getDynamicRangeMax(const char *tensorName) const noexcept
Get maximum of dynamic range.
Definition: NvInferRuntime.h:924
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:959
int32_t getAllWeights(int32_t size, const char **weightsNames) noexcept
Get names of all weights that could be refit.
Definition: NvInferRuntime.h:1028
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:974
Allows a serialized functionally unsafe engine to be deserialized.
Definition: NvInferRuntime.h:653
ICudaEngine * deserializeCudaEngine(const void *blob, std::size_t size) noexcept
Deserialize an engine from a stream.
Definition: NvInferRuntime.h:779
TRT_DEPRECATED void destroy() noexcept
Destroy this object.
Definition: NvInferRuntime.h:716
void setDLACore(int32_t dlaCore) noexcept
Set the DLA core that the deserialized engine must execute on.
Definition: NvInferRuntime.h:685
int32_t getNbDLACores() const noexcept
Returns number of DLA hardware cores accessible.
Definition: NvInferRuntime.h:704
int32_t getDLACore() const noexcept
Get the DLA core that the engine executes on.
Definition: NvInferRuntime.h:696
void setGpuAllocator(IGpuAllocator *allocator) noexcept
Set the GPU allocator.
Definition: NvInferRuntime.h:730
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:764
TRT_DEPRECATED nvinfer1::ICudaEngine * deserializeCudaEngine(const void *blob, std::size_t size, IPluginFactory *pluginFactory) noexcept
Deserialize an engine from a stream.
Definition: NvInferRuntime.h:672
ILogger * getLogger() const noexcept
get the logger with which the runtime was created
Definition: NvInferRuntime.h:789
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:749
Register the plugin creator to the registry The static registry object will be instantiated when the ...
Definition: NvInferRuntime.h:2635
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:157
DataType type
The type of the weights.
Definition: NvInferRuntime.h:159
int64_t count
The number of weights in the array.
Definition: NvInferRuntime.h:161
const void * values
The weight values, in a contiguous array.
Definition: NvInferRuntime.h:160
IRefitter * createInferRefitter(ICudaEngine &engine, ILogger &logger) noexcept
Create an instance of an IRefitter class.
Definition: NvInferRuntime.h:2615
IRuntime * createInferRuntime(ILogger &logger) noexcept
Create an instance of an IRuntime class.
Definition: NvInferRuntime.h:2605
The TensorRT API version 1 namespace.
uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferRuntime.h:1275
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:106
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:223
@ kSUB
Substract the second element from the first.
@ kSUM
Sum of the two operands.
@ kPROD
Product of the two operands.
@ kFLOOR_DIV
Floor division of the first element by the second.
@ kEQUAL
1 if operands are equal, 0 otherwise.
@ kMIN
Minimum of the two operands.
@ kLESS
1 if first operand is less than second operand, 0 otherwise.
@ kMAX
Maximum of the two operands.
@ kCEIL_DIV
Division rounding up.
constexpr int32_t EnumMax< WeightsRole >() noexcept
Maximum number of elements in WeightsRole enum.
Definition: NvInferRuntime.h:622
char_t AsciiChar
AsciiChar is the type used by TensorRT to represent valid ASCII characters.
Definition: NvInferRuntimeCommon.h:125
@ kV2_DYNAMICEXT
IPluginV2DynamicExt.
constexpr int32_t EnumMax< LayerInformationFormat >() noexcept
Definition: NvInferRuntime.h:2427
DataType
The type of weights and tensors.
Definition: NvInferRuntimeCommon.h:151
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:633
@ kSCALE
Scale layer.
@ kCONSTANT
Constant layer.
@ kDEFAULT
Similar to ONNX Gather.
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:611
@ kSHIFT
shift part of IScaleLayer
@ kANY
Any other weights role.
@ kBIAS
bias for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
@ kKERNEL
kernel for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
constexpr int32_t EnumMax< ProfilingVerbosity >() noexcept
Maximum number of profile verbosity levels in ProfilingVerbosity enum.
Definition: NvInferRuntime.h:1297
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
Definition: NvInferRuntime.h:1287
@ kLAYER_NAMES_ONLY
Print only the layer names. This is the default setting.
@ kDETAILED
Print detailed layer information including layer names and layer parameters.
TacticSource
List of tactic sources for TensorRT.
Definition: NvInferRuntime.h:1256
@ kCUBLAS_LT
cuBLAS LT tactics
@ kCUDNN
cuDNN tactics
@ kCUBLAS
cuBLAS tactics.
TensorFormat PluginFormat
PluginFormat is reserved for backward compatibility.
Definition: NvInferRuntimeCommon.h:345
@ kMIN
Minimum of the two elements.
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntimeCommon.h:221
constexpr int32_t EnumMax< TacticSource >() noexcept
Maximum number of tactic sources in TacticSource enum.
Definition: NvInferRuntime.h:1264
LayerInformationFormat
The format in which the IEngineInspector prints the layer information.
Definition: NvInferRuntime.h:2419
@ kJSON
Print layer information in JSON format.
@ kONELINE
Print layer information in one line per layer.
constexpr int32_t EnumMax< DeviceType >() noexcept
Maximum number of elements in DeviceType enum.
Definition: NvInferRuntime.h:640
constexpr int32_t EnumMax< DimensionOperation >() noexcept
Maximum number of elements in DimensionOperation enum.
Definition: NvInferRuntime.h:237
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntime.h:247
@ kHOST
Data stored on host.
@ kDEVICE
Data stored on device.
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:1058
@ kOPT
This is used to set or get the value that is used in the optimization (kernel selection).
Definition: NvInferRuntime.h:352
Dims min
Lower bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:357
Dims max
Upper bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:360
PluginTensorDesc desc
Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of ...
Definition: NvInferRuntime.h:354
Fields that a plugin might see for an input or output.
Definition: NvInferRuntimeCommon.h:370
Declaration of EnumMaxImpl struct to store maximum number of elements in an enumeration type.
Definition: NvInferRuntimeCommon.h:136