TensorRT-RTX 1.0.0
NvInferRuntime.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef NV_INFER_RUNTIME_H
19#define NV_INFER_RUNTIME_H
20
26
27#include "NvInferImpl.h"
28#define NV_INFER_INTERNAL_INCLUDE 1
29#include "NvInferPluginBase.h"
30#undef NV_INFER_INTERNAL_INCLUDE
32
33namespace nvinfer1
34{
35
36class IExecutionContext;
37class ICudaEngine;
38class IPluginFactory;
39class IEngineInspector;
40
49
51{
52protected:
53 INoCopy() = default;
54 virtual ~INoCopy() = default;
55 INoCopy(INoCopy const& other) = delete;
56 INoCopy& operator=(INoCopy const& other) = delete;
57 INoCopy(INoCopy&& other) = delete;
58 INoCopy& operator=(INoCopy&& other) = delete;
59};
60
75enum class EngineCapability : int32_t
76{
81 kSTANDARD = 0,
82
89 kSAFETY = 1,
90
97};
98
99namespace impl
100{
102template <>
104{
105 static constexpr int32_t kVALUE = 3;
106};
107} // namespace impl
108
124{
125public:
127 void const* values;
128 int64_t count;
129};
130
141class IHostMemory : public INoCopy
142{
143public:
144 virtual ~IHostMemory() noexcept = default;
145
147 void* data() const noexcept
148 {
149 return mImpl->data();
150 }
151
153 std::size_t size() const noexcept
154 {
155 return mImpl->size();
156 }
157
159 DataType type() const noexcept
160 {
161 return mImpl->type();
162 }
163
164protected:
165 apiv::VHostMemory* mImpl;
166};
167
178enum class DimensionOperation : int32_t
179{
180 kSUM = 0,
181 kPROD = 1,
182 kMAX = 2,
183 kMIN = 3,
184 kSUB = 4,
185 kEQUAL = 5,
186 kLESS = 6,
187 kFLOOR_DIV = 7,
188 kCEIL_DIV = 8
189};
190
192template <>
193constexpr inline int32_t EnumMax<DimensionOperation>() noexcept
194{
195 return 9;
196}
197
203enum class TensorLocation : int32_t
204{
205 kDEVICE = 0,
206 kHOST = 1,
207};
208
209namespace impl
210{
212template <>
214{
215 static constexpr int32_t kVALUE = 2;
216};
217} // namespace impl
218
232{
233public:
237 bool isConstant() const noexcept
238 {
239 return mImpl->isConstant();
240 }
241
248 int64_t getConstantValue() const noexcept
249 {
250 return mImpl->getConstantValue();
251 }
252
253protected:
254 apiv::VDimensionExpr* mImpl;
255 virtual ~IDimensionExpr() noexcept = default;
256
257public:
263 bool isSizeTensor() const noexcept
264 {
265 return mImpl->isSizeTensor();
266 }
267};
268
286class IExprBuilder : public INoCopy
287{
288public:
292 IDimensionExpr const* constant(int64_t value) noexcept
293 {
294 return mImpl->constant(value);
295 }
296
304 DimensionOperation op, IDimensionExpr const& first, IDimensionExpr const& second) noexcept
305 {
306 return mImpl->operation(op, first, second);
307 }
308
309protected:
310 apiv::VExprBuilder* mImpl;
311 virtual ~IExprBuilder() noexcept = default;
312
313public:
338 IDimensionExpr const* declareSizeTensor(int32_t outputIndex, IDimensionExpr const& opt, IDimensionExpr const& upper)
339 {
340 return mImpl->declareSizeTensor(outputIndex, opt, upper);
341 }
342};
343
350{
351public:
352 int32_t nbDims;
354};
355
362{
365
368
371
374};
375
407{
408public:
409 IPluginV2DynamicExt* clone() const noexcept override = 0;
410
435 virtual DimsExprs getOutputDimensions(
436 int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept = 0;
437
441 static constexpr int32_t kFORMAT_COMBINATION_LIMIT = 100;
442
475 virtual bool supportsFormatCombination(
476 int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
477
515 virtual void configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs,
516 DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
517
527 virtual size_t getWorkspaceSize(PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs,
528 int32_t nbOutputs) const noexcept = 0;
529
542 virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc,
543 void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0;
544
545protected:
553 int32_t getTensorRTVersion() const noexcept override
554 {
555 return (static_cast<int32_t>(PluginVersion::kV2_DYNAMICEXT) << 24 | (NV_TENSORRT_VERSION & 0xFFFFFF));
556 }
557
558 virtual ~IPluginV2DynamicExt() noexcept {}
559
560private:
561 // Following are obsolete base class methods, and must not be implemented or used.
562
566 void configurePlugin(Dims const*, int32_t, Dims const*, int32_t, DataType const*, DataType const*, bool const*,
567 bool const*, PluginFormat, int32_t) noexcept override final
568 {
569 }
570
574 bool supportsFormat(DataType, PluginFormat) const noexcept override final
575 {
576 return false;
577 }
578
582 Dims getOutputDimensions(int32_t, Dims const*, int32_t) noexcept override final
583 {
584 return Dims{-1, {}};
585 }
586
594 TRT_DEPRECATED bool isOutputBroadcastAcrossBatch(int32_t, bool const*, int32_t) const noexcept override final
595 {
596 return false;
597 }
598
606 TRT_DEPRECATED bool canBroadcastInputAcrossBatch(int32_t) const noexcept override final
607 {
608 return true;
609 }
610
614 size_t getWorkspaceSize(int32_t) const noexcept override final
615 {
616 return 0;
617 }
618
622 int32_t enqueue(int32_t, void const* const*, void* const*, void*, cudaStream_t) noexcept override final
623 {
624 return 1;
625 }
626};
627
628namespace v_1_0
629{
631{
632public:
637 ~IStreamReader() override = default;
638 IStreamReader() = default;
639
643 InterfaceInfo getInterfaceInfo() const noexcept override
644 {
645 return InterfaceInfo{"IStreamReader", 1, 0};
646 }
647
656 virtual int64_t read(void* destination, int64_t nbBytes) = 0;
657
658protected:
659 IStreamReader(IStreamReader const&) = default;
663};
664} // namespace v_1_0
665
675
680enum class SeekPosition : int32_t
681{
683 kSET = 0,
684
686 kCUR = 1,
687
689 kEND = 2,
690};
691
692namespace v_1_0
693{
695{
696public:
701 ~IStreamReaderV2() override = default;
702 IStreamReaderV2() = default;
703
707 InterfaceInfo getInterfaceInfo() const noexcept override
708 {
709 return InterfaceInfo{"IStreamReaderV2", 1, 0};
710 }
711
722 virtual int64_t read(void* destination, int64_t nbBytes, cudaStream_t stream) noexcept = 0;
723
732 virtual bool seek(int64_t offset, SeekPosition where) noexcept = 0;
733
734protected:
739};
740} // namespace v_1_0
741
752
753namespace v_1_0
754{
756{
757public:
765 virtual void reportLayerTime(char const* layerName, float ms) noexcept = 0;
766
767 virtual ~IProfiler() noexcept {}
768};
769} // namespace v_1_0
770
783
791enum class WeightsRole : int32_t
792{
793 kKERNEL = 0,
794 kBIAS = 1,
795 kSHIFT = 2,
796 kSCALE = 3,
797 kCONSTANT = 4,
798 kANY = 5,
799};
800
802template <>
803constexpr inline int32_t EnumMax<WeightsRole>() noexcept
804{
805 return 6;
806}
807
813enum class DeviceType : int32_t
814{
815 kGPU = 0,
816 kDLA = 1,
817};
818
820template <>
821constexpr inline int32_t EnumMax<DeviceType>() noexcept
822{
823 return 2;
824}
825
836enum class TempfileControlFlag : int32_t
837{
840
845};
846
848template <>
849constexpr inline int32_t EnumMax<TempfileControlFlag>() noexcept
850{
851 return 2;
852}
853
860using TempfileControlFlags = uint32_t;
861
893enum class TensorFormat : int32_t
894{
901 kLINEAR = 0,
902
907 kCHW2 = 1,
908
912 kHWC8 = 2,
913
927 kCHW4 = 3,
928
935 kCHW16 = 4,
936
944 kCHW32 = 5,
945
950 kDHWC8 = 6,
951
956 kCDHW32 = 7,
957
961 kHWC = 8,
962
971 kDLA_LINEAR = 9,
972
986 kDLA_HWC4 = 10,
987
992 kHWC16 = 11,
993
998 kDHWC = 12
999};
1000
1001namespace impl
1002{
1004template <>
1006{
1008 static constexpr int32_t kVALUE = 13;
1009};
1010} // namespace impl
1011
1017enum class AllocatorFlag : int32_t
1018{
1020 kRESIZABLE = 0,
1021};
1022
1023namespace impl
1024{
1026template <>
1028{
1030 static constexpr int32_t kVALUE = 1;
1031};
1032} // namespace impl
1033
1034using AllocatorFlags = uint32_t;
1035
1038
1052{
1053public:
1059 enum class Severity : int32_t
1060 {
1062 kINTERNAL_ERROR = 0,
1064 kERROR = 1,
1066 kWARNING = 2,
1068 kINFO = 3,
1070 kVERBOSE = 4,
1071 };
1072
1091 virtual void log(Severity severity, AsciiChar const* msg) noexcept = 0;
1092
1093 ILogger() = default;
1094 virtual ~ILogger() = default;
1095
1096protected:
1097 // @cond SuppressDoxyWarnings
1098 ILogger(ILogger const&) = default;
1099 ILogger(ILogger&&) = default;
1100 ILogger& operator=(ILogger const&) & = default;
1101 ILogger& operator=(ILogger&&) & = default;
1102 // @endcond
1103};
1104
1105namespace impl
1106{
1108template <>
1109struct EnumMaxImpl<ILogger::Severity>
1110{
1112 static constexpr int32_t kVALUE = 5;
1113};
1114} // namespace impl
1115
1116namespace v_1_0
1117{
1118
1120{
1121public:
1147 uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept = 0;
1148
1149 ~IGpuAllocator() override = default;
1150 IGpuAllocator() = default;
1151
1189 virtual void* reallocate(void* const /*baseAddr*/, uint64_t /*alignment*/, uint64_t /*newSize*/) noexcept
1190 {
1191 return nullptr;
1192 }
1193
1212 TRT_DEPRECATED virtual bool deallocate(void* const memory) noexcept = 0;
1213
1242 virtual void* allocateAsync(
1243 uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t /*stream*/) noexcept
1244 {
1245 return allocate(size, alignment, flags);
1246 }
1275 virtual bool deallocateAsync(void* const memory, cudaStream_t /*stream*/) noexcept
1276 {
1277 return deallocate(memory);
1278 }
1279
1283 InterfaceInfo getInterfaceInfo() const noexcept override
1284 {
1285 return {"IGpuAllocator", 1, 0};
1286 }
1287
1288protected:
1289 // @cond SuppressDoxyWarnings
1290 IGpuAllocator(IGpuAllocator const&) = default;
1291 IGpuAllocator(IGpuAllocator&&) = default;
1292 IGpuAllocator& operator=(IGpuAllocator const&) & = default;
1293 IGpuAllocator& operator=(IGpuAllocator&&) & = default;
1294 // @endcond
1295};
1296
1297} // namespace v_1_0
1298
1320
1328class IRuntime : public INoCopy
1329{
1330public:
1331 virtual ~IRuntime() noexcept = default;
1332
1344 void setDLACore(int32_t dlaCore) noexcept
1345 {
1346 mImpl->setDLACore(dlaCore);
1347 }
1348
1354 int32_t getDLACore() const noexcept
1355 {
1356 return mImpl->getDLACore();
1357 }
1358
1362 int32_t getNbDLACores() const noexcept
1363 {
1364 return mImpl->getNbDLACores();
1365 }
1366
1378 void setGpuAllocator(IGpuAllocator* allocator) noexcept
1379 {
1380 mImpl->setGpuAllocator(allocator);
1381 }
1382
1394 //
1397 void setErrorRecorder(IErrorRecorder* recorder) noexcept
1398 {
1399 mImpl->setErrorRecorder(recorder);
1400 }
1401
1413 {
1414 return mImpl->getErrorRecorder();
1415 }
1416
1430 ICudaEngine* deserializeCudaEngine(void const* blob, std::size_t size) noexcept
1431 {
1432 return mImpl->deserializeCudaEngine(blob, size);
1433 }
1434
1454 {
1455 return mImpl->deserializeCudaEngineV2(streamReader);
1456 }
1457
1463 ILogger* getLogger() const noexcept
1464 {
1465 return mImpl->getLogger();
1466 }
1467
1478 bool setMaxThreads(int32_t maxThreads) noexcept
1479 {
1480 return mImpl->setMaxThreads(maxThreads);
1481 }
1482
1492 int32_t getMaxThreads() const noexcept
1493 {
1494 return mImpl->getMaxThreads();
1495 }
1496
1527 void setTemporaryDirectory(char const* path) noexcept
1528 {
1529 return mImpl->setTemporaryDirectory(path);
1530 }
1531
1538 char const* getTemporaryDirectory() const noexcept
1539 {
1540 return mImpl->getTemporaryDirectory();
1541 }
1542
1555 {
1556 return mImpl->setTempfileControlFlags(flags);
1557 }
1558
1567 {
1568 return mImpl->getTempfileControlFlags();
1569 }
1570
1577 {
1578 return mImpl->getPluginRegistry();
1579 }
1580
1594 IRuntime* loadRuntime(char const* path) noexcept
1595 {
1596 return mImpl->loadRuntime(path);
1597 }
1598
1606 void setEngineHostCodeAllowed(bool allowed) noexcept
1607 {
1608 return mImpl->setEngineHostCodeAllowed(allowed);
1609 }
1610
1616 bool getEngineHostCodeAllowed() const noexcept
1617 {
1618 return mImpl->getEngineHostCodeAllowed();
1619 }
1620
1621protected:
1622 apiv::VRuntime* mImpl;
1623};
1624
1632class IRefitter : public INoCopy
1633{
1634public:
1635 virtual ~IRefitter() noexcept = default;
1636
1652 bool setWeights(char const* layerName, WeightsRole role, Weights weights) noexcept
1653 {
1654 return mImpl->setWeights(layerName, role, weights);
1655 }
1656
1669 bool refitCudaEngine() noexcept
1670 {
1671 return mImpl->refitCudaEngine();
1672 }
1673
1690 int32_t getMissing(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
1691 {
1692 return mImpl->getMissing(size, layerNames, roles);
1693 }
1694
1707 int32_t getAll(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
1708 {
1709 return mImpl->getAll(size, layerNames, roles);
1710 }
1711
1723 //
1726 void setErrorRecorder(IErrorRecorder* recorder) noexcept
1727 {
1728 mImpl->setErrorRecorder(recorder);
1729 }
1730
1742 {
1743 return mImpl->getErrorRecorder();
1744 }
1745
1766 bool setNamedWeights(char const* name, Weights weights) noexcept
1767 {
1768 return mImpl->setNamedWeights(name, weights);
1769 }
1770
1786 int32_t getMissingWeights(int32_t size, char const** weightsNames) noexcept
1787 {
1788 return mImpl->getMissingWeights(size, weightsNames);
1789 }
1790
1802 int32_t getAllWeights(int32_t size, char const** weightsNames) noexcept
1803 {
1804 return mImpl->getAllWeights(size, weightsNames);
1805 }
1806
1812 ILogger* getLogger() const noexcept
1813 {
1814 return mImpl->getLogger();
1815 }
1816
1828 bool setMaxThreads(int32_t maxThreads) noexcept
1829 {
1830 return mImpl->setMaxThreads(maxThreads);
1831 }
1832
1842 int32_t getMaxThreads() const noexcept
1843 {
1844 return mImpl->getMaxThreads();
1845 }
1846
1869 bool setNamedWeights(char const* name, Weights weights, TensorLocation location) noexcept
1870 {
1871 return mImpl->setNamedWeightsWithLocation(name, weights, location);
1872 }
1873
1885 Weights getNamedWeights(char const* weightsName) const noexcept
1886 {
1887 return mImpl->getNamedWeights(weightsName);
1888 }
1889
1901 TensorLocation getWeightsLocation(char const* weightsName) const noexcept
1902 {
1903 return mImpl->getWeightsLocation(weightsName);
1904 }
1905
1917 bool unsetNamedWeights(char const* weightsName) noexcept
1918 {
1919 return mImpl->unsetNamedWeights(weightsName);
1920 }
1921
1933 void setWeightsValidation(bool weightsValidation) noexcept
1934 {
1935 return mImpl->setWeightsValidation(weightsValidation);
1936 }
1937
1941 bool getWeightsValidation() const noexcept
1942 {
1943 return mImpl->getWeightsValidation();
1944 }
1945
1963 bool refitCudaEngineAsync(cudaStream_t stream) noexcept
1964 {
1965 return mImpl->refitCudaEngineAsync(stream);
1966 }
1967
1981 Weights getWeightsPrototype(char const* weightsName) const noexcept
1982 {
1983 return mImpl->getWeightsPrototype(weightsName);
1984 }
1985
1986protected:
1987 apiv::VRefitter* mImpl;
1988};
1989
2000enum class OptProfileSelector : int32_t
2001{
2002 kMIN = 0,
2003 kOPT = 1,
2004 kMAX = 2
2005};
2006
2012template <>
2013constexpr inline int32_t EnumMax<OptProfileSelector>() noexcept
2014{
2015 return 3;
2016}
2017
2041{
2042public:
2070 bool setDimensions(char const* inputName, OptProfileSelector select, Dims const& dims) noexcept
2071 {
2072 return mImpl->setDimensions(inputName, select, dims);
2073 }
2074
2082 Dims getDimensions(char const* inputName, OptProfileSelector select) const noexcept
2083 {
2084 return mImpl->getDimensions(inputName, select);
2085 }
2086
2135 char const* inputName, OptProfileSelector select, int32_t const* values, int32_t nbValues) noexcept
2136 {
2137 return mImpl->setShapeValues(inputName, select, values, nbValues);
2138 }
2139
2148 int32_t getNbShapeValues(char const* inputName) const noexcept
2149 {
2150 return mImpl->getNbShapeValues(inputName);
2151 }
2152
2162 TRT_DEPRECATED int32_t const* getShapeValues(char const* inputName, OptProfileSelector select) const noexcept
2163 {
2164 return mImpl->getShapeValues(inputName, select);
2165 }
2166
2180 bool setExtraMemoryTarget(float target) noexcept
2181 {
2182 return mImpl->setExtraMemoryTarget(target);
2183 }
2184
2192 float getExtraMemoryTarget() const noexcept
2193 {
2194 return mImpl->getExtraMemoryTarget();
2195 }
2196
2209 bool isValid() const noexcept
2210 {
2211 return mImpl->isValid();
2212 }
2213
2260 char const* inputName, OptProfileSelector select, int64_t const* values, int32_t nbValues) noexcept
2261 {
2262 return mImpl->setShapeValuesV2(inputName, select, values, nbValues);
2263 }
2264
2272 int64_t const* getShapeValuesV2(char const* inputName, OptProfileSelector select) const noexcept
2273 {
2274 return mImpl->getShapeValuesV2(inputName, select);
2275 }
2276
2277protected:
2278 apiv::VOptimizationProfile* mImpl;
2279 virtual ~IOptimizationProfile() noexcept = default;
2280};
2281
2289enum class TacticSource : int32_t
2290{
2295
2299
2304
2309
2313};
2314
2315template <>
2316constexpr inline int32_t EnumMax<TacticSource>() noexcept
2317{
2318 return 5;
2319}
2320
2327using TacticSources = uint32_t;
2328
2338enum class ProfilingVerbosity : int32_t
2339{
2340 kLAYER_NAMES_ONLY = 0,
2341 kNONE = 1,
2342 kDETAILED = 2,
2343};
2344
2346template <>
2347constexpr inline int32_t EnumMax<ProfilingVerbosity>() noexcept
2348{
2349 return 3;
2350}
2351
2358using SerializationFlags = uint32_t;
2359
2367enum class SerializationFlag : int32_t
2368{
2369 kEXCLUDE_WEIGHTS = 0,
2371};
2372
2374template <>
2375constexpr inline int32_t EnumMax<SerializationFlag>() noexcept
2376{
2377 return 2;
2378}
2379
2388{
2389public:
2390 virtual ~ISerializationConfig() noexcept = default;
2391
2403 bool setFlags(SerializationFlags serializationFlags) noexcept
2404 {
2405 return mImpl->setFlags(serializationFlags);
2406 }
2407
2416 {
2417 return mImpl->getFlags();
2418 }
2419
2427 bool clearFlag(SerializationFlag serializationFlag) noexcept
2428 {
2429 return mImpl->clearFlag(serializationFlag);
2430 }
2431
2439 bool setFlag(SerializationFlag serializationFlag) noexcept
2440 {
2441 return mImpl->setFlag(serializationFlag);
2442 }
2443
2451 bool getFlag(SerializationFlag serializationFlag) const noexcept
2452 {
2453 return mImpl->getFlag(serializationFlag);
2454 }
2455
2456protected:
2457 apiv::VSerializationConfig* mImpl;
2458};
2459
2472{
2473 kSTATIC = 0,
2474 kON_PROFILE_CHANGE = 1,
2475 kUSER_MANAGED = 2,
2476};
2477
2483template <>
2484constexpr inline int32_t EnumMax<ExecutionContextAllocationStrategy>() noexcept
2485{
2486 return 3;
2487}
2488
2497{
2498public:
2499 virtual ~IRuntimeCache() noexcept = default;
2500
2506 IHostMemory* serialize() const noexcept
2507 {
2508 return mImpl->serialize();
2509 }
2510
2517 bool deserialize(void const* blob, size_t size) noexcept
2518 {
2519 return mImpl->deserialize(blob, size);
2520 }
2521
2525 bool reset() noexcept
2526 {
2527 return mImpl->reset();
2528 }
2529
2530protected:
2531 apiv::VRuntimeCache* mImpl;
2532};
2533
2546{
2552 kLAZY = 0,
2553
2558 kEAGER = 1,
2559
2563 kNONE = 2,
2564};
2565
2571template <>
2573{
2574 return 3;
2575}
2576
2584{
2585public:
2586 virtual ~IRuntimeConfig() noexcept = default;
2587
2593 void setExecutionContextAllocationStrategy(ExecutionContextAllocationStrategy strategy) noexcept
2594 {
2595 return mImpl->setExecutionContextAllocationStrategy(strategy);
2596 }
2597
2604 {
2605 return mImpl->getExecutionContextAllocationStrategy();
2606 }
2607
2614 {
2615 return mImpl->createRuntimeCache();
2616 }
2617
2623 bool setRuntimeCache(IRuntimeCache const& cache) noexcept
2624 {
2625 return mImpl->setRuntimeCache(cache);
2626 }
2627
2634 {
2635 return mImpl->getRuntimeCache();
2636 }
2637
2644 DynamicShapesKernelSpecializationStrategy dynamicShapesKernelSpecializationStrategy) noexcept
2645 {
2646 return mImpl->setDynamicShapesKernelSpecializationStrategy(dynamicShapesKernelSpecializationStrategy);
2647 }
2648
2655 {
2656 return mImpl->getDynamicShapesKernelSpecializationStrategy();
2657 }
2658
2659
2660protected:
2661 apiv::VRuntimeConfig* mImpl;
2662}; // class IRuntimeConfig
2663
2671class ICudaEngine : public INoCopy
2672{
2673public:
2674 virtual ~ICudaEngine() noexcept = default;
2675
2686 Dims getTensorShape(char const* tensorName) const noexcept
2687 {
2688 return mImpl->getTensorShape(tensorName);
2689 }
2690
2701 DataType getTensorDataType(char const* tensorName) const noexcept
2702 {
2703 return mImpl->getTensorDataType(tensorName);
2704 }
2705
2715 int32_t getNbLayers() const noexcept
2716 {
2717 return mImpl->getNbLayers();
2718 }
2719
2729 IHostMemory* serialize() const noexcept
2730 {
2731 return mImpl->serialize();
2732 }
2733
2748 {
2749 return mImpl->createExecutionContext(strategy);
2750 }
2751
2764 TensorLocation getTensorLocation(char const* tensorName) const noexcept
2765 {
2766 return mImpl->getTensorLocation(tensorName);
2767 }
2768
2784 bool isShapeInferenceIO(char const* tensorName) const noexcept
2785 {
2786 return mImpl->isShapeInferenceIO(tensorName);
2787 }
2788
2798 TensorIOMode getTensorIOMode(char const* tensorName) const noexcept
2799 {
2800 return mImpl->getTensorIOMode(tensorName);
2801 }
2802
2811 {
2812 return mImpl->createExecutionContextWithRuntimeConfig(runtimeConfig);
2813 }
2814
2824 {
2825 return mImpl->createRuntimeConfig();
2826 }
2827
2839 int64_t getDeviceMemorySizeV2() const noexcept
2840 {
2841 return mImpl->getDeviceMemorySizeV2();
2842 }
2843
2855 int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept
2856 {
2857 return mImpl->getDeviceMemorySizeForProfileV2(profileIndex);
2858 }
2859
2865 bool isRefittable() const noexcept
2866 {
2867 return mImpl->isRefittable();
2868 }
2869
2886 int32_t getTensorBytesPerComponent(char const* tensorName) const noexcept
2887 {
2888 return mImpl->getTensorBytesPerComponent(tensorName);
2889 }
2890
2904 int32_t getTensorBytesPerComponent(char const* tensorName, int32_t profileIndex) const noexcept
2905 {
2906 return mImpl->getTensorBytesPerComponentV2(tensorName, profileIndex);
2907 }
2908
2925 int32_t getTensorComponentsPerElement(char const* tensorName) const noexcept
2926 {
2927 return mImpl->getTensorComponentsPerElement(tensorName);
2928 }
2929
2943 int32_t getTensorComponentsPerElement(char const* tensorName, int32_t profileIndex) const noexcept
2944 {
2945 return mImpl->getTensorComponentsPerElementV2(tensorName, profileIndex);
2946 }
2947
2958 TensorFormat getTensorFormat(char const* tensorName) const noexcept
2959 {
2960 return mImpl->getTensorFormat(tensorName);
2961 }
2962
2972 TensorFormat getTensorFormat(char const* tensorName, int32_t profileIndex) const noexcept
2973 {
2974 return mImpl->getTensorFormatV2(tensorName, profileIndex);
2975 }
2976
2996 char const* getTensorFormatDesc(char const* tensorName) const noexcept
2997 {
2998 return mImpl->getTensorFormatDesc(tensorName);
2999 }
3000
3019 char const* getTensorFormatDesc(char const* tensorName, int32_t profileIndex) const noexcept
3020 {
3021 return mImpl->getTensorFormatDescV2(tensorName, profileIndex);
3022 }
3023
3036 int32_t getTensorVectorizedDim(char const* tensorName) const noexcept
3037 {
3038 return mImpl->getTensorVectorizedDim(tensorName);
3039 }
3040
3052 int32_t getTensorVectorizedDim(char const* tensorName, int32_t profileIndex) const noexcept
3053 {
3054 return mImpl->getTensorVectorizedDimV2(tensorName, profileIndex);
3055 }
3056
3067 char const* getName() const noexcept
3068 {
3069 return mImpl->getName();
3070 }
3071
3078 int32_t getNbOptimizationProfiles() const noexcept
3079 {
3080 return mImpl->getNbOptimizationProfiles();
3081 }
3082
3098 Dims getProfileShape(char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
3099 {
3100 return mImpl->getProfileShape(tensorName, profileIndex, select);
3101 }
3102
3114 {
3115 return mImpl->getEngineCapability();
3116 }
3117
3132 void setErrorRecorder(IErrorRecorder* recorder) noexcept
3133 {
3134 return mImpl->setErrorRecorder(recorder);
3135 }
3136
3148 {
3149 return mImpl->getErrorRecorder();
3150 }
3151
3162 {
3163 return mImpl->hasImplicitBatchDimension();
3164 }
3165
3178 {
3179 return mImpl->getTacticSources();
3180 }
3181
3190 {
3191 return mImpl->getProfilingVerbosity();
3192 }
3193
3200 {
3201 return mImpl->createEngineInspector();
3202 }
3203
3212 int32_t getNbIOTensors() const noexcept
3213 {
3214 return mImpl->getNbIOTensors();
3215 }
3216
3224 char const* getIOTensorName(int32_t index) const noexcept
3225 {
3226 return mImpl->getIOTensorName(index);
3227 }
3228
3236 {
3237 return mImpl->getHardwareCompatibilityLevel();
3238 }
3239
3250 int32_t getNbAuxStreams() const noexcept
3251 {
3252 return mImpl->getNbAuxStreams();
3253 }
3254
3261 {
3262 return mImpl->createSerializationConfig();
3263 }
3264
3277 {
3278 return mImpl->serializeWithConfig(config);
3279 }
3280
3292 int64_t getStreamableWeightsSize() const noexcept
3293 {
3294 return mImpl->getStreamableWeightsSize();
3295 }
3296
3334 bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept
3335 {
3336 return mImpl->setWeightStreamingBudgetV2(gpuMemoryBudget);
3337 }
3338
3352 int64_t getWeightStreamingBudgetV2() const noexcept
3353 {
3354 return mImpl->getWeightStreamingBudgetV2();
3355 }
3356
3377 int64_t getWeightStreamingAutomaticBudget() const noexcept
3378 {
3379 return mImpl->getWeightStreamingAutomaticBudget();
3380 }
3381
3406 {
3407 return mImpl->getWeightStreamingScratchMemorySize();
3408 }
3409
3419 bool isDebugTensor(char const* name) const noexcept
3420 {
3421 return mImpl->isDebugTensor(name);
3422 }
3423
3444 char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
3445 {
3446 return mImpl->getProfileTensorValuesV2(tensorName, profileIndex, select);
3447 }
3448
3449protected:
3450 apiv::VCudaEngine* mImpl;
3451};
3452
3453namespace v_1_0
3454{
3456{
3457public:
3461 InterfaceInfo getInterfaceInfo() const noexcept override
3462 {
3463 return {"IOutputAllocator", 1, 0};
3464 }
3465
3489 char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment, cudaStream_t /*stream*/)
3490 {
3491 return nullptr;
3492 }
3493
3502 virtual void notifyShape(char const* tensorName, Dims const& dims) noexcept = 0;
3503};
3504} // namespace v_1_0
3505
3514
3515namespace v_1_0
3516{
3518{
3519public:
3523 InterfaceInfo getInterfaceInfo() const noexcept override
3524 {
3525 return {"IDebugListener", 1, 0};
3526 }
3527
3541 virtual bool processDebugTensor(void const* addr, TensorLocation location, DataType type, Dims const& shape,
3542 char const* name, cudaStream_t stream)
3543 = 0;
3544
3545 ~IDebugListener() override = default;
3546};
3547} // namespace v_1_0
3548
3555
3567{
3568public:
3569 virtual ~IExecutionContext() noexcept = default;
3570
3579 void setDebugSync(bool sync) noexcept
3580 {
3581 mImpl->setDebugSync(sync);
3582 }
3583
3589 bool getDebugSync() const noexcept
3590 {
3591 return mImpl->getDebugSync();
3592 }
3593
3599 void setProfiler(IProfiler* profiler) noexcept
3600 {
3601 mImpl->setProfiler(profiler);
3602 }
3603
3609 IProfiler* getProfiler() const noexcept
3610 {
3611 return mImpl->getProfiler();
3612 }
3613
3619 ICudaEngine const& getEngine() const noexcept
3620 {
3621 return mImpl->getEngine();
3622 }
3623
3633 void setName(char const* name) noexcept
3634 {
3635 mImpl->setName(name);
3636 }
3637
3643 char const* getName() const noexcept
3644 {
3645 return mImpl->getName();
3646 }
3647
3670 void setDeviceMemory(void* memory) noexcept
3671 {
3672 mImpl->setDeviceMemory(memory);
3673 }
3674
3692 void setDeviceMemoryV2(void* memory, int64_t size) noexcept
3693 {
3694 return mImpl->setDeviceMemoryV2(memory, size);
3695 }
3696
3713 Dims getTensorStrides(char const* tensorName) const noexcept
3714 {
3715 return mImpl->getTensorStrides(tensorName);
3716 }
3717
3718public:
3728 int32_t getOptimizationProfile() const noexcept
3729 {
3730 return mImpl->getOptimizationProfile();
3731 }
3732
3746 bool setInputShape(char const* tensorName, Dims const& dims) noexcept
3747 {
3748 return mImpl->setInputShape(tensorName, dims);
3749 }
3750
3783 Dims getTensorShape(char const* tensorName) const noexcept
3784 {
3785 return mImpl->getTensorShape(tensorName);
3786 }
3787
3799 bool allInputDimensionsSpecified() const noexcept
3800 {
3801 return mImpl->allInputDimensionsSpecified();
3802 }
3803
3818 void setErrorRecorder(IErrorRecorder* recorder) noexcept
3819 {
3820 mImpl->setErrorRecorder(recorder);
3821 }
3822
3834 {
3835 return mImpl->getErrorRecorder();
3836 }
3837
3850 bool executeV2(void* const* bindings) noexcept
3851 {
3852 return mImpl->executeV2(bindings);
3853 }
3854
3894 bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
3895 {
3896 return mImpl->setOptimizationProfileAsync(profileIndex, stream);
3897 }
3898
3910 void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
3911 {
3912 mImpl->setEnqueueEmitsProfile(enqueueEmitsProfile);
3913 }
3914
3922 bool getEnqueueEmitsProfile() const noexcept
3923 {
3924 return mImpl->getEnqueueEmitsProfile();
3925 }
3926
3952 bool reportToProfiler() const noexcept
3953 {
3954 return mImpl->reportToProfiler();
3955 }
3956
3996 bool setTensorAddress(char const* tensorName, void* data) noexcept
3997 {
3998 return mImpl->setTensorAddress(tensorName, data);
3999 }
4000
4013 void const* getTensorAddress(char const* tensorName) const noexcept
4014 {
4015 return mImpl->getTensorAddress(tensorName);
4016 }
4017
4036 bool setOutputTensorAddress(char const* tensorName, void* data) noexcept
4037 {
4038 return mImpl->setOutputTensorAddress(tensorName, data);
4039 }
4040
4058 bool setInputTensorAddress(char const* tensorName, void const* data) noexcept
4059 {
4060 return mImpl->setInputTensorAddress(tensorName, data);
4061 }
4062
4077 void* getOutputTensorAddress(char const* tensorName) const noexcept
4078 {
4079 return mImpl->getOutputTensorAddress(tensorName);
4080 }
4081
4110 int32_t inferShapes(int32_t nbMaxNames, char const** tensorNames) noexcept
4111 {
4112 return mImpl->inferShapes(nbMaxNames, tensorNames);
4113 }
4114
4128 {
4129 return mImpl->updateDeviceMemorySizeForShapes();
4130 }
4131
4143 bool setInputConsumedEvent(cudaEvent_t event) noexcept
4144 {
4145 return mImpl->setInputConsumedEvent(event);
4146 }
4147
4153 cudaEvent_t getInputConsumedEvent() const noexcept
4154 {
4155 return mImpl->getInputConsumedEvent();
4156 }
4157
4172 bool setOutputAllocator(char const* tensorName, IOutputAllocator* outputAllocator) noexcept
4173 {
4174 return mImpl->setOutputAllocator(tensorName, outputAllocator);
4175 }
4176
4185 IOutputAllocator* getOutputAllocator(char const* tensorName) const noexcept
4186 {
4187 return mImpl->getOutputAllocator(tensorName);
4188 }
4189
4203 int64_t getMaxOutputSize(char const* tensorName) const noexcept
4204 {
4205 return mImpl->getMaxOutputSize(tensorName);
4206 }
4207
4224 {
4225 return mImpl->setTemporaryStorageAllocator(allocator);
4226 }
4227
4234 {
4235 return mImpl->getTemporaryStorageAllocator();
4236 }
4237
4257 bool enqueueV3(cudaStream_t stream) noexcept
4258 {
4259 return mImpl->enqueueV3(stream);
4260 }
4261
4273 void setPersistentCacheLimit(size_t size) noexcept
4274 {
4275 mImpl->setPersistentCacheLimit(size);
4276 }
4277
4284 size_t getPersistentCacheLimit() const noexcept
4285 {
4286 return mImpl->getPersistentCacheLimit();
4287 }
4288
4308 bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
4309 {
4310 return mImpl->setNvtxVerbosity(verbosity);
4311 }
4312
4321 {
4322 return mImpl->getNvtxVerbosity();
4323 }
4324
4351 void setAuxStreams(cudaStream_t* auxStreams, int32_t nbStreams) noexcept
4352 {
4353 mImpl->setAuxStreams(auxStreams, nbStreams);
4354 }
4355
4363 bool setDebugListener(IDebugListener* listener) noexcept
4364 {
4365 return mImpl->setDebugListener(listener);
4366 }
4367
4374 {
4375 return mImpl->getDebugListener();
4376 }
4377
4392 bool setTensorDebugState(char const* name, bool flag) noexcept
4393 {
4394 return mImpl->setTensorDebugState(name, flag);
4395 }
4396
4404 bool getDebugState(char const* name) const noexcept
4405 {
4406 return mImpl->getDebugState(name);
4407 }
4408
4415 {
4416 return mImpl->getRuntimeConfig();
4417 }
4418
4427 bool setAllTensorsDebugState(bool flag) noexcept
4428 {
4429 return mImpl->setAllTensorsDebugState(flag);
4430 }
4431
4443 bool setUnfusedTensorsDebugState(bool flag) noexcept
4444 {
4445 return mImpl->setUnfusedTensorsDebugState(flag);
4446 }
4447
4453 bool getUnfusedTensorsDebugState() const noexcept
4454 {
4455 return mImpl->getUnfusedTensorsDebugState();
4456 }
4457
4458protected:
4459 apiv::VExecutionContext* mImpl;
4460}; // class IExecutionContext
4461
4469enum class LayerInformationFormat : int32_t
4470{
4471 kONELINE = 0,
4472 kJSON = 1,
4473};
4474
4477template <>
4478constexpr inline int32_t EnumMax<LayerInformationFormat>() noexcept
4479{
4480 return 2;
4481}
4482
4499{
4500public:
4501 virtual ~IEngineInspector() noexcept = default;
4502
4515 bool setExecutionContext(IExecutionContext const* context) noexcept
4516 {
4517 return mImpl->setExecutionContext(context);
4518 }
4519
4528 {
4529 return mImpl->getExecutionContext();
4530 }
4531
4552 char const* getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
4553 {
4554 return mImpl->getLayerInformation(layerIndex, format);
4555 }
4556
4575 char const* getEngineInformation(LayerInformationFormat format) const noexcept
4576 {
4577 return mImpl->getEngineInformation(format);
4578 }
4579
4594 void setErrorRecorder(IErrorRecorder* recorder) noexcept
4595 {
4596 mImpl->setErrorRecorder(recorder);
4597 }
4598
4610 {
4611 return mImpl->getErrorRecorder();
4612 }
4613
4614protected:
4615 apiv::VEngineInspector* mImpl;
4616}; // class IEngineInspector
4617
4618} // namespace nvinfer1
4619
4624extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(void* logger, int32_t version) noexcept;
4625
4630extern "C" TENSORRTAPI void* createInferRefitter_INTERNAL(void* engine, void* logger, int32_t version) noexcept;
4631
4636
4642extern "C" TENSORRTAPI nvinfer1::ILogger* getLogger() noexcept;
4643
4644namespace nvinfer1
4645{
4646namespace // unnamed namespace avoids linkage surprises when linking objects built with different versions of this
4647 // header.
4648{
4654inline IRuntime* createInferRuntime(ILogger& logger) noexcept
4655{
4656 return static_cast<IRuntime*>(createInferRuntime_INTERNAL(&logger, NV_TENSORRT_VERSION));
4657}
4658
4665inline IRefitter* createInferRefitter(ICudaEngine& engine, ILogger& logger) noexcept
4666{
4667 return static_cast<IRefitter*>(createInferRefitter_INTERNAL(&engine, &logger, NV_TENSORRT_VERSION));
4668}
4669
4670} // namespace
4671
4683template <typename T>
4685{
4686public:
4688 {
4689 getPluginRegistry()->registerCreator(instance, "");
4690 }
4691
4692private:
4694 T instance{};
4695};
4696
4697} // namespace nvinfer1
4698
4699namespace nvinfer1
4700{
4710{
4711public:
4719 virtual ILogger* findLogger() = 0;
4720
4721protected:
4722 virtual ~ILoggerFinder() = default;
4723};
4724
4727namespace v_1_0
4728{
4729
4731{
4732public:
4734 ~IGpuAsyncAllocator() override = default;
4735
4765 void* allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags,
4766 cudaStream_t /*stream*/) noexcept override = 0;
4767
4793 bool deallocateAsync(void* const memory, cudaStream_t /*stream*/) noexcept override = 0;
4794
4819 uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept override
4820 {
4821 return allocateAsync(size, alignment, flags, nullptr);
4822 }
4823
4842 TRT_DEPRECATED bool deallocate(void* const memory) noexcept override
4843 {
4844 return deallocateAsync(memory, nullptr);
4845 }
4846
4850 InterfaceInfo getInterfaceInfo() const noexcept override
4851 {
4852 return {"IGpuAllocator", 1, 0};
4853 }
4854};
4855
4856} // namespace v_1_0
4857
4872
4873} // namespace nvinfer1
4874
4878extern "C" TENSORRTAPI int32_t getInferLibMajorVersion() noexcept;
4882extern "C" TENSORRTAPI int32_t getInferLibMinorVersion() noexcept;
4886extern "C" TENSORRTAPI int32_t getInferLibPatchVersion() noexcept;
4890extern "C" TENSORRTAPI int32_t getInferLibBuildVersion() noexcept;
4891
4892#endif // NV_INFER_RUNTIME_H
nvinfer1::ILogger * getLogger() noexcept
Return the logger object.
int32_t getInferLibMajorVersion() noexcept
Return the library major version number.
nvinfer1::IPluginRegistry * getPluginRegistry() noexcept
Return the plugin registry.
int32_t getInferLibPatchVersion() noexcept
Return the library patch version number.
int32_t getInferLibMinorVersion() noexcept
Return the library minor version number.
int32_t getInferLibBuildVersion() noexcept
Return the library build version number.
#define TENSORRTAPI
Definition: NvInferRuntimeBase.h:69
#define NV_TENSORRT_VERSION
Definition: NvInferRuntimeBase.h:101
#define TRT_DEPRECATED
Definition: NvInferRuntimeBase.h:42
#define TRT_DEPRECATED_ENUM
Definition: NvInferRuntimeBase.h:43
Structure to define the dimensions of a tensor.
Definition: NvInferRuntimeBase.h:216
static constexpr int32_t MAX_DIMS
The maximum rank (number of dimensions) supported for a tensor.
Definition: NvInferRuntimeBase.h:219
Analog of class Dims with expressions instead of constants for the dimensions.
Definition: NvInferRuntime.h:350
IDimensionExpr const * d[Dims::MAX_DIMS]
The extent of each dimension.
Definition: NvInferRuntime.h:353
int32_t nbDims
The number of dimensions.
Definition: NvInferRuntime.h:352
An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:2672
int32_t getTensorBytesPerComponent(char const *tensorName) const noexcept
Return the number of bytes per component of an element, or -1 if the tensor is not vectorized or prov...
Definition: NvInferRuntime.h:2886
ISerializationConfig * createSerializationConfig() noexcept
Create a serialization configuration object.
Definition: NvInferRuntime.h:3260
char const * getIOTensorName(int32_t index) const noexcept
Return name of an IO tensor.
Definition: NvInferRuntime.h:3224
int64_t getWeightStreamingBudgetV2() const noexcept
Returns the current weight streaming device memory budget in bytes.
Definition: NvInferRuntime.h:3352
EngineCapability getEngineCapability() const noexcept
Determine what execution capability this engine has.
Definition: NvInferRuntime.h:3113
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:3147
TensorFormat getTensorFormat(char const *tensorName, int32_t profileIndex) const noexcept
Return the tensor format of given profile, or TensorFormat::kLINEAR if the provided name does not map...
Definition: NvInferRuntime.h:2972
int64_t const * getProfileTensorValuesV2(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values (not dimensions) for an input tensor given its name under ...
Definition: NvInferRuntime.h:3443
TRT_DEPRECATED bool hasImplicitBatchDimension() const noexcept
Query whether the engine was built with an implicit batch dimension.
Definition: NvInferRuntime.h:3161
apiv::VCudaEngine * mImpl
Definition: NvInferRuntime.h:3450
IExecutionContext * createExecutionContext(ExecutionContextAllocationStrategy strategy=ExecutionContextAllocationStrategy::kSTATIC) noexcept
Create an execution context and specify the strategy for allocating internal activation memory.
Definition: NvInferRuntime.h:2746
char const * getTensorFormatDesc(char const *tensorName) const noexcept
Return the human readable description of the tensor format, or empty string if the provided name does...
Definition: NvInferRuntime.h:2996
Dims getProfileShape(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for an input tensor given its name under an optimizati...
Definition: NvInferRuntime.h:3098
bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept
Limit the maximum amount of GPU memory usable for network weights in bytes.
Definition: NvInferRuntime.h:3334
IExecutionContext * createExecutionContext(IRuntimeConfig *runtimeConfig) noexcept
Create an execution context with TensorRT JIT runtime config.
Definition: NvInferRuntime.h:2810
int32_t getNbAuxStreams() const noexcept
Return the number of auxiliary streams used by this engine.
Definition: NvInferRuntime.h:3250
int64_t getStreamableWeightsSize() const noexcept
Get the total size in bytes of all streamable weights.
Definition: NvInferRuntime.h:3292
DataType getTensorDataType(char const *tensorName) const noexcept
Determine the required data type for a buffer from its tensor name.
Definition: NvInferRuntime.h:2701
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:3132
TacticSources getTacticSources() const noexcept
return the tactic sources required by this engine.
Definition: NvInferRuntime.h:3177
IHostMemory * serializeWithConfig(ISerializationConfig &config) const noexcept
Serialize the network to a stream with the provided SerializationConfig.
Definition: NvInferRuntime.h:3276
virtual ~ICudaEngine() noexcept=default
int64_t getWeightStreamingAutomaticBudget() const noexcept
TensorRT automatically determines a device memory budget for the model to run. The budget is close to...
Definition: NvInferRuntime.h:3377
bool isDebugTensor(char const *name) const noexcept
Check if a tensor is marked as a debug tensor.
Definition: NvInferRuntime.h:3419
int32_t getTensorVectorizedDim(char const *tensorName, int32_t profileIndex) const noexcept
Return the dimension index that the buffer is vectorized of given profile, or -1 if the provided name...
Definition: NvInferRuntime.h:3052
char const * getName() const noexcept
Returns the name of the network associated with the engine.
Definition: NvInferRuntime.h:3067
ProfilingVerbosity getProfilingVerbosity() const noexcept
Return the ProfilingVerbosity the builder config was set to when the engine was built.
Definition: NvInferRuntime.h:3189
bool isShapeInferenceIO(char const *tensorName) const noexcept
True if tensor is required as input for shape calculations or is output from shape calculations.
Definition: NvInferRuntime.h:2784
int64_t getWeightStreamingScratchMemorySize() const noexcept
Returns the size of the scratch memory required by the current weight streaming budget.
Definition: NvInferRuntime.h:3405
int64_t getDeviceMemorySizeV2() const noexcept
Return the maximum device memory required by the context over all profiles.
Definition: NvInferRuntime.h:2839
int32_t getTensorVectorizedDim(char const *tensorName) const noexcept
Return the dimension index that the buffer is vectorized, or -1 if the provided name does not map to ...
Definition: NvInferRuntime.h:3036
int32_t getTensorComponentsPerElement(char const *tensorName, int32_t profileIndex) const noexcept
Return the number of components included in one element of given profile, or -1 if tensor is not vect...
Definition: NvInferRuntime.h:2943
int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept
Return the maximum device memory required by the context for a profile.
Definition: NvInferRuntime.h:2855
IRuntimeConfig * createRuntimeConfig() noexcept
Create a runtime config for TensorRT JIT. The caller is responsible for ownership of the returned IRu...
Definition: NvInferRuntime.h:2823
TensorFormat getTensorFormat(char const *tensorName) const noexcept
Return the tensor format, or TensorFormat::kLINEAR if the provided name does not map to an input or o...
Definition: NvInferRuntime.h:2958
IHostMemory * serialize() const noexcept
Serialize the network to a stream.
Definition: NvInferRuntime.h:2729
TensorLocation getTensorLocation(char const *tensorName) const noexcept
Get whether an input or output tensor must be on GPU or CPU.
Definition: NvInferRuntime.h:2764
IEngineInspector * createEngineInspector() const noexcept
Create a new engine inspector which prints the layer information in an engine or an execution context...
Definition: NvInferRuntime.h:3199
int32_t getTensorBytesPerComponent(char const *tensorName, int32_t profileIndex) const noexcept
Return the number of bytes per component of an element given of given profile, or -1 if the tensor is...
Definition: NvInferRuntime.h:2904
HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept
Return the hardware compatibility level of this engine.
Definition: NvInferRuntime.h:3235
int32_t getNbOptimizationProfiles() const noexcept
Get the number of optimization profiles defined for this engine.
Definition: NvInferRuntime.h:3078
char const * getTensorFormatDesc(char const *tensorName, int32_t profileIndex) const noexcept
Return the human readable description of the tensor format of given profile, or empty string if the p...
Definition: NvInferRuntime.h:3019
TensorIOMode getTensorIOMode(char const *tensorName) const noexcept
Determine whether a tensor is an input or output tensor.
Definition: NvInferRuntime.h:2798
int32_t getNbLayers() const noexcept
Get the number of layers in the network.
Definition: NvInferRuntime.h:2715
int32_t getNbIOTensors() const noexcept
Return number of IO tensors.
Definition: NvInferRuntime.h:3212
int32_t getTensorComponentsPerElement(char const *tensorName) const noexcept
Return the number of components included in one element, or -1 if tensor is not vectorized or if the ...
Definition: NvInferRuntime.h:2925
bool isRefittable() const noexcept
Return true if an engine can be refit.
Definition: NvInferRuntime.h:2865
An IDimensionExpr represents an integer expression constructed from constants, input dimensions,...
Definition: NvInferRuntime.h:232
bool isConstant() const noexcept
Return true if expression is a build-time constant.
Definition: NvInferRuntime.h:237
bool isSizeTensor() const noexcept
Return true if this denotes the value of a size tensor.
Definition: NvInferRuntime.h:263
virtual ~IDimensionExpr() noexcept=default
apiv::VDimensionExpr * mImpl
Definition: NvInferRuntime.h:254
int64_t getConstantValue() const noexcept
Get the value of the constant.
Definition: NvInferRuntime.h:248
An engine inspector which prints out the layer information of an engine or an execution context.
Definition: NvInferRuntime.h:4499
char const * getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
Get a string describing the information about a specific layer in the current engine or the execution...
Definition: NvInferRuntime.h:4552
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:4609
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:4594
IExecutionContext const * getExecutionContext() const noexcept
Get the context currently being inspected.
Definition: NvInferRuntime.h:4527
apiv::VEngineInspector * mImpl
Definition: NvInferRuntime.h:4615
virtual ~IEngineInspector() noexcept=default
char const * getEngineInformation(LayerInformationFormat format) const noexcept
Get a string describing the information about all the layers in the current engine or the execution c...
Definition: NvInferRuntime.h:4575
Context for executing inference using an engine, with functionally unsafe features.
Definition: NvInferRuntime.h:3567
IOutputAllocator * getOutputAllocator(char const *tensorName) const noexcept
Get output allocator associated with output tensor of given name, or nullptr if the provided name doe...
Definition: NvInferRuntime.h:4185
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:3833
bool reportToProfiler() const noexcept
Calculate layer timing info for the current optimization profile in IExecutionContext and update the ...
Definition: NvInferRuntime.h:3952
virtual ~IExecutionContext() noexcept=default
void setDeviceMemory(void *memory) noexcept
Set the device memory for use by this execution context.
Definition: NvInferRuntime.h:3670
bool setTensorDebugState(char const *name, bool flag) noexcept
Set debug state of tensor given the tensor name.
Definition: NvInferRuntime.h:4392
char const * getName() const noexcept
Return the name of the execution context.
Definition: NvInferRuntime.h:3643
IGpuAllocator * getTemporaryStorageAllocator() const noexcept
Get allocator set by setTemporaryStorageAllocator.
Definition: NvInferRuntime.h:4233
void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
Set whether enqueue emits layer timing to the profiler.
Definition: NvInferRuntime.h:3910
bool setUnfusedTensorsDebugState(bool flag) noexcept
Turn the debug state of unfused tensors on or off.
Definition: NvInferRuntime.h:4443
Dims getTensorShape(char const *tensorName) const noexcept
Return the shape of the given input or output.
Definition: NvInferRuntime.h:3783
bool getDebugState(char const *name) const noexcept
Get the debug state.
Definition: NvInferRuntime.h:4404
bool setInputShape(char const *tensorName, Dims const &dims) noexcept
Set shape of given input.
Definition: NvInferRuntime.h:3746
bool executeV2(void *const *bindings) noexcept
Synchronously execute a network.
Definition: NvInferRuntime.h:3850
bool getEnqueueEmitsProfile() const noexcept
Get the enqueueEmitsProfile state.
Definition: NvInferRuntime.h:3922
void const * getTensorAddress(char const *tensorName) const noexcept
Get memory address bound to given input or output tensor, or nullptr if the provided name does not ma...
Definition: NvInferRuntime.h:4013
bool setOutputAllocator(char const *tensorName, IOutputAllocator *outputAllocator) noexcept
Set output allocator to use for output tensor of given name. Pass nullptr to outputAllocator to unset...
Definition: NvInferRuntime.h:4172
bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
Select an optimization profile for the current context with async semantics.
Definition: NvInferRuntime.h:3894
apiv::VExecutionContext * mImpl
Definition: NvInferRuntime.h:4459
bool setOutputTensorAddress(char const *tensorName, void *data) noexcept
Set the memory address for a given output tensor.
Definition: NvInferRuntime.h:4036
void setPersistentCacheLimit(size_t size) noexcept
Set the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:4273
size_t getPersistentCacheLimit() const noexcept
Get the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:4284
bool setAllTensorsDebugState(bool flag) noexcept
Turn the debug state of all debug tensors on or off.
Definition: NvInferRuntime.h:4427
ICudaEngine const & getEngine() const noexcept
Get the associated engine.
Definition: NvInferRuntime.h:3619
ProfilingVerbosity getNvtxVerbosity() const noexcept
Get the NVTX verbosity of the execution context.
Definition: NvInferRuntime.h:4320
size_t updateDeviceMemorySizeForShapes() noexcept
Recompute the internal activation buffer sizes based on the current input shapes, and return the tota...
Definition: NvInferRuntime.h:4127
void setAuxStreams(cudaStream_t *auxStreams, int32_t nbStreams) noexcept
Set the auxiliary streams that TensorRT should launch kernels on in the next enqueueV3() call.
Definition: NvInferRuntime.h:4351
int64_t getMaxOutputSize(char const *tensorName) const noexcept
Get upper bound on an output tensor's size, in bytes, based on the current optimization profile and i...
Definition: NvInferRuntime.h:4203
int32_t inferShapes(int32_t nbMaxNames, char const **tensorNames) noexcept
Run shape calculations.
Definition: NvInferRuntime.h:4110
bool setDebugListener(IDebugListener *listener) noexcept
Set DebugListener for this execution context.
Definition: NvInferRuntime.h:4363
bool setTensorAddress(char const *tensorName, void *data) noexcept
Set memory address for given input or output tensor.
Definition: NvInferRuntime.h:3996
bool setTemporaryStorageAllocator(IGpuAllocator *allocator) noexcept
Specify allocator to use for internal temporary storage.
Definition: NvInferRuntime.h:4223
void * getOutputTensorAddress(char const *tensorName) const noexcept
Get memory address for given output.
Definition: NvInferRuntime.h:4077
bool enqueueV3(cudaStream_t stream) noexcept
Enqueue inference on a stream.
Definition: NvInferRuntime.h:4257
IDebugListener * getDebugListener() noexcept
Get the DebugListener of this execution context.
Definition: NvInferRuntime.h:4373
int32_t getOptimizationProfile() const noexcept
Get the index of the currently selected optimization profile.
Definition: NvInferRuntime.h:3728
bool setInputTensorAddress(char const *tensorName, void const *data) noexcept
Set memory address for given input.
Definition: NvInferRuntime.h:4058
bool getDebugSync() const noexcept
Get the debug sync flag.
Definition: NvInferRuntime.h:3589
bool setInputConsumedEvent(cudaEvent_t event) noexcept
Mark input as consumed.
Definition: NvInferRuntime.h:4143
Dims getTensorStrides(char const *tensorName) const noexcept
Return the strides of the buffer for the given tensor name.
Definition: NvInferRuntime.h:3713
bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
Set the verbosity of the NVTX markers in the execution context.
Definition: NvInferRuntime.h:4308
IProfiler * getProfiler() const noexcept
Get the profiler.
Definition: NvInferRuntime.h:3609
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:3818
void setDeviceMemoryV2(void *memory, int64_t size) noexcept
Set the device memory and its corresponding size for use by this execution context.
Definition: NvInferRuntime.h:3692
bool allInputDimensionsSpecified() const noexcept
Whether all dynamic dimensions of input tensors have been specified.
Definition: NvInferRuntime.h:3799
bool getUnfusedTensorsDebugState() const noexcept
Get the debug state of unfused tensors.
Definition: NvInferRuntime.h:4453
void setProfiler(IProfiler *profiler) noexcept
Set the profiler.
Definition: NvInferRuntime.h:3599
void setName(char const *name) noexcept
Set the name of the execution context.
Definition: NvInferRuntime.h:3633
cudaEvent_t getInputConsumedEvent() const noexcept
The event associated with consuming the input.
Definition: NvInferRuntime.h:4153
IRuntimeConfig * getRuntimeConfig() const noexcept
Get the runtime config object used during execution context creation.
Definition: NvInferRuntime.h:4414
Object for constructing IDimensionExpr.
Definition: NvInferRuntime.h:287
IDimensionExpr const * operation(DimensionOperation op, IDimensionExpr const &first, IDimensionExpr const &second) noexcept
Get the operation.
Definition: NvInferRuntime.h:303
virtual ~IExprBuilder() noexcept=default
IDimensionExpr const * constant(int64_t value) noexcept
Return pointer to IDimensionExpr for given value.
Definition: NvInferRuntime.h:292
apiv::VExprBuilder * mImpl
Definition: NvInferRuntime.h:310
IDimensionExpr const * declareSizeTensor(int32_t outputIndex, IDimensionExpr const &opt, IDimensionExpr const &upper)
Declare a size tensor at the given output index, with the specified auto-tuning formula and upper bou...
Definition: NvInferRuntime.h:338
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:142
void * data() const noexcept
A pointer to the raw data that is owned by the library.
Definition: NvInferRuntime.h:147
DataType type() const noexcept
The type of the memory that was allocated.
Definition: NvInferRuntime.h:159
std::size_t size() const noexcept
The size in bytes of the data that was allocated.
Definition: NvInferRuntime.h:153
apiv::VHostMemory * mImpl
Definition: NvInferRuntime.h:165
virtual ~IHostMemory() noexcept=default
A virtual base class to find a logger. Allows a plugin to find an instance of a logger if it needs to...
Definition: NvInferRuntime.h:4710
virtual ILogger * findLogger()=0
Get the logger used by the engine or execution context which called the plugin method.
virtual ~ILoggerFinder()=default
Application-implemented logging interface for the builder, refitter and runtime.
Definition: NvInferRuntime.h:1052
virtual ~ILogger()=default
Severity
The severity corresponding to a log message.
Definition: NvInferRuntime.h:1060
@ kWARNING
An application error has been discovered, but TensorRT has recovered or fallen back to a default.
@ kERROR
An application error has occurred.
@ kINFO
Informational messages with instructional information.
@ kINTERNAL_ERROR
An internal error has occurred. Execution is unrecoverable.
@ kVERBOSE
Verbose messages with debugging information.
virtual void log(Severity severity, AsciiChar const *msg) noexcept=0
A callback implemented by the application to handle logging messages;.
Forward declaration of IEngineInspector for use by other interfaces.
Definition: NvInferRuntime.h:51
INoCopy & operator=(INoCopy &&other)=delete
INoCopy(INoCopy const &other)=delete
INoCopy(INoCopy &&other)=delete
virtual ~INoCopy()=default
INoCopy & operator=(INoCopy const &other)=delete
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:2041
TRT_DEPRECATED int32_t const * getShapeValues(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2162
apiv::VOptimizationProfile * mImpl
Definition: NvInferRuntime.h:2278
Dims getDimensions(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:2082
TRT_DEPRECATED bool setShapeValues(char const *inputName, OptProfileSelector select, int32_t const *values, int32_t nbValues) noexcept
Set the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2134
virtual ~IOptimizationProfile() noexcept=default
float getExtraMemoryTarget() const noexcept
Get the extra memory target that has been defined for this profile.
Definition: NvInferRuntime.h:2192
bool setExtraMemoryTarget(float target) noexcept
Set a target for extra GPU memory that may be used by this profile.
Definition: NvInferRuntime.h:2180
bool setDimensions(char const *inputName, OptProfileSelector select, Dims const &dims) noexcept
Set the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:2070
bool isValid() const noexcept
Check whether the optimization profile can be passed to an IBuilderConfig object.
Definition: NvInferRuntime.h:2209
int64_t const * getShapeValuesV2(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2272
bool setShapeValuesV2(char const *inputName, OptProfileSelector select, int64_t const *values, int32_t nbValues) noexcept
Set the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2259
int32_t getNbShapeValues(char const *inputName) const noexcept
Get the number of values for an input shape tensor.
Definition: NvInferRuntime.h:2148
Single registration point for all plugins in an application. It is used to find plugin implementation...
Definition: NvInferRuntimeCommon.h:56
virtual TRT_DEPRECATED bool registerCreator(IPluginCreator &creator, AsciiChar const *const pluginNamespace) noexcept=0
Register a plugin creator implementing IPluginCreator. Returns false if any plugin creator with the s...
Similar to IPluginV2Ext, but with support for dynamic shapes.
Definition: NvInferRuntime.h:407
IPluginV2DynamicExt * clone() const noexcept override=0
Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin...
virtual ~IPluginV2DynamicExt() noexcept
Definition: NvInferRuntime.h:558
Plugin class for user-implemented layers.
Definition: NvInferRuntimePlugin.h:474
Updates weights in an engine.
Definition: NvInferRuntime.h:1633
bool setWeights(char const *layerName, WeightsRole role, Weights weights) noexcept
Specify new weights for a layer of given name. Returns true on success, or false if new weights are r...
Definition: NvInferRuntime.h:1652
bool refitCudaEngineAsync(cudaStream_t stream) noexcept
Enqueue weights refitting of the associated engine on the given stream.
Definition: NvInferRuntime.h:1963
int32_t getMaxThreads() const noexcept
get the maximum number of threads that can be used by the refitter.
Definition: NvInferRuntime.h:1842
TensorLocation getWeightsLocation(char const *weightsName) const noexcept
Get location for the weights associated with the given name.
Definition: NvInferRuntime.h:1901
bool setNamedWeights(char const *name, Weights weights) noexcept
Specify new weights of given name.
Definition: NvInferRuntime.h:1766
int32_t getAllWeights(int32_t size, char const **weightsNames) noexcept
Get names of all weights that could be refit.
Definition: NvInferRuntime.h:1802
ILogger * getLogger() const noexcept
get the logger with which the refitter was created
Definition: NvInferRuntime.h:1812
bool refitCudaEngine() noexcept
Refits associated engine.
Definition: NvInferRuntime.h:1669
int32_t getMissingWeights(int32_t size, char const **weightsNames) noexcept
Get names of missing weights.
Definition: NvInferRuntime.h:1786
int32_t getMissing(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of missing weights.
Definition: NvInferRuntime.h:1690
Weights getNamedWeights(char const *weightsName) const noexcept
Get weights associated with the given name.
Definition: NvInferRuntime.h:1885
bool unsetNamedWeights(char const *weightsName) noexcept
Unset weights associated with the given name.
Definition: NvInferRuntime.h:1917
Weights getWeightsPrototype(char const *weightsName) const noexcept
Get the Weights prototype associated with the given name.
Definition: NvInferRuntime.h:1981
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:1828
bool setNamedWeights(char const *name, Weights weights, TensorLocation location) noexcept
Specify new weights on a specified device of given name.
Definition: NvInferRuntime.h:1869
void setWeightsValidation(bool weightsValidation) noexcept
Set whether to validate weights during refitting.
Definition: NvInferRuntime.h:1933
apiv::VRefitter * mImpl
Definition: NvInferRuntime.h:1987
int32_t getAll(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of all weights that could be refit.
Definition: NvInferRuntime.h:1707
virtual ~IRefitter() noexcept=default
bool getWeightsValidation() const noexcept
Get whether to validate weights values during refitting.
Definition: NvInferRuntime.h:1941
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:1726
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:1741
A class for Runtime cache currently used for TensorRT JIT compilation. This cache can be serialized a...
Definition: NvInferRuntime.h:2497
virtual ~IRuntimeCache() noexcept=default
bool deserialize(void const *blob, size_t size) noexcept
Deserialize the Runtime cache from a stream that contains serialized Runtime cache.
Definition: NvInferRuntime.h:2517
bool reset() noexcept
Reset the Runtime cache. Clears all content within the cache.
Definition: NvInferRuntime.h:2525
apiv::VRuntimeCache * mImpl
Definition: NvInferRuntime.h:2531
A class for runtime configuration. This class is used during execution context creation.
Definition: NvInferRuntime.h:2584
DynamicShapesKernelSpecializationStrategy getDynamicShapesKernelSpecializationStrategy() const noexcept
Return the dynamic shape specialization strategy of this config.
Definition: NvInferRuntime.h:2654
virtual ~IRuntimeConfig() noexcept=default
apiv::VRuntimeConfig * mImpl
Definition: NvInferRuntime.h:2661
IRuntimeCache * createRuntimeCache() const noexcept
Create an empty Runtime cache.
Definition: NvInferRuntime.h:2613
ExecutionContextAllocationStrategy getExecutionContextAllocationStrategy() const noexcept
Get the execution context allocation strategy.
Definition: NvInferRuntime.h:2603
bool setRuntimeCache(IRuntimeCache const &cache) noexcept
Set Runtime cache to the runtime config. Enables Runtime caching.
Definition: NvInferRuntime.h:2623
void setDynamicShapesKernelSpecializationStrategy(DynamicShapesKernelSpecializationStrategy dynamicShapesKernelSpecializationStrategy) noexcept
Set the dynamic shape kernel specialization strategy for this config.
Definition: NvInferRuntime.h:2643
IRuntimeCache * getRuntimeCache() const noexcept
Get the Runtime cache from the runtime config.
Definition: NvInferRuntime.h:2633
Allows a serialized functionally unsafe engine to be deserialized.
Definition: NvInferRuntime.h:1329
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:1478
IRuntime * loadRuntime(char const *path) noexcept
Load IRuntime from the file.
Definition: NvInferRuntime.h:1594
bool getEngineHostCodeAllowed() const noexcept
Get whether the runtime is allowed to deserialize engines with host executable code.
Definition: NvInferRuntime.h:1616
TempfileControlFlags getTempfileControlFlags() const noexcept
Get the tempfile control flags for this runtime.
Definition: NvInferRuntime.h:1566
void setEngineHostCodeAllowed(bool allowed) noexcept
Set whether the runtime is allowed to deserialize engines with host executable code.
Definition: NvInferRuntime.h:1606
virtual ~IRuntime() noexcept=default
void setTemporaryDirectory(char const *path) noexcept
Set the directory that will be used by this runtime for temporary files.
Definition: NvInferRuntime.h:1527
IPluginRegistry & getPluginRegistry() noexcept
Get the local plugin registry that can be used by the runtime.
Definition: NvInferRuntime.h:1576
apiv::VRuntime * mImpl
Definition: NvInferRuntime.h:1622
void setDLACore(int32_t dlaCore) noexcept
Sets the DLA core used by the network. Defaults to -1.
Definition: NvInferRuntime.h:1344
int32_t getNbDLACores() const noexcept
Returns number of DLA hardware cores accessible or 0 if DLA is unavailable.
Definition: NvInferRuntime.h:1362
ICudaEngine * deserializeCudaEngine(void const *blob, std::size_t size) noexcept
Deserialize an engine from host memory.
Definition: NvInferRuntime.h:1430
void setTempfileControlFlags(TempfileControlFlags flags) noexcept
Set the tempfile control flags for this runtime.
Definition: NvInferRuntime.h:1554
int32_t getDLACore() const noexcept
Get the DLA core that the engine executes on.
Definition: NvInferRuntime.h:1354
void setGpuAllocator(IGpuAllocator *allocator) noexcept
Set the GPU allocator.
Definition: NvInferRuntime.h:1378
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:1412
ICudaEngine * deserializeCudaEngine(IStreamReaderV2 &streamReader)
Deserialize an engine from a stream. IStreamReaderV2 is expected to support reading to both host and ...
Definition: NvInferRuntime.h:1453
ILogger * getLogger() const noexcept
get the logger with which the runtime was created
Definition: NvInferRuntime.h:1463
int32_t getMaxThreads() const noexcept
Get the maximum number of threads that can be used by the runtime.
Definition: NvInferRuntime.h:1492
char const * getTemporaryDirectory() const noexcept
Get the directory that will be used by this runtime for temporary files.
Definition: NvInferRuntime.h:1538
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:1397
Holds properties for configuring an engine to serialize the binary.
Definition: NvInferRuntime.h:2388
virtual ~ISerializationConfig() noexcept=default
bool clearFlag(SerializationFlag serializationFlag) noexcept
clear a serialization flag.
Definition: NvInferRuntime.h:2427
bool setFlag(SerializationFlag serializationFlag) noexcept
Set a serialization flag.
Definition: NvInferRuntime.h:2439
SerializationFlags getFlags() const noexcept
Get the serialization flags for this config.
Definition: NvInferRuntime.h:2415
bool getFlag(SerializationFlag serializationFlag) const noexcept
Returns true if the serialization flag is set.
Definition: NvInferRuntime.h:2451
apiv::VSerializationConfig * mImpl
Definition: NvInferRuntime.h:2457
An Interface class for version control.
Definition: NvInferRuntimeBase.h:276
IVersionedInterface & operator=(IVersionedInterface const &) &=default
Version information associated with a TRT interface.
Definition: NvInferRuntimeBase.h:241
Register the plugin creator to the registry The static registry object will be instantiated when the ...
Definition: NvInferRuntime.h:4685
PluginRegistrar()
Definition: NvInferRuntime.h:4687
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:124
DataType type
The type of the weights.
Definition: NvInferRuntime.h:126
int64_t count
The number of weights in the array.
Definition: NvInferRuntime.h:128
void const * values
The weight values, in a contiguous array.
Definition: NvInferRuntime.h:127
Definition: NvInferRuntime.h:3518
virtual bool processDebugTensor(void const *addr, TensorLocation location, DataType type, Dims const &shape, char const *name, cudaStream_t stream)=0
Callback function that is called when a debug tensor’s value is updated and the debug state of the te...
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:3523
~IDebugListener() override=default
Definition: NvInferRuntimeBase.h:413
Definition: NvInferRuntime.h:1120
virtual void * allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t) noexcept
A thread-safe callback implemented by the application to handle stream-ordered acquisition of GPU mem...
Definition: NvInferRuntime.h:1242
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1283
virtual TRT_DEPRECATED bool deallocate(void *const memory) noexcept=0
A thread-safe callback implemented by the application to handle release of GPU memory.
~IGpuAllocator() override=default
virtual void * reallocate(void *const, uint64_t, uint64_t) noexcept
A thread-safe callback implemented by the application to resize an existing allocation.
Definition: NvInferRuntime.h:1189
virtual TRT_DEPRECATED void * allocate(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept=0
A thread-safe callback implemented by the application to handle acquisition of GPU memory.
virtual bool deallocateAsync(void *const memory, cudaStream_t) noexcept
A thread-safe callback implemented by the application to handle stream-ordered release of GPU memory.
Definition: NvInferRuntime.h:1275
Definition: NvInferRuntime.h:4731
bool deallocateAsync(void *const memory, cudaStream_t) noexcept override=0
A thread-safe callback implemented by the application to handle stream-ordered asynchronous release o...
void * allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t) noexcept override=0
A thread-safe callback implemented by the application to handle stream-ordered asynchronous acquisiti...
TRT_DEPRECATED void * allocate(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept override
A thread-safe callback implemented by the application to handle acquisition of GPU memory.
Definition: NvInferRuntime.h:4818
TRT_DEPRECATED bool deallocate(void *const memory) noexcept override
A thread-safe callback implemented by the application to handle release of GPU memory.
Definition: NvInferRuntime.h:4842
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:4850
~IGpuAsyncAllocator() override=default
Definition: NvInferRuntime.h:3456
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:3461
virtual void * reallocateOutputAsync(char const *tensorName, void *currentMemory, uint64_t size, uint64_t alignment, cudaStream_t)
Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated....
Definition: NvInferRuntime.h:3488
virtual void notifyShape(char const *tensorName, Dims const &dims) noexcept=0
Called by TensorRT when the shape of the output tensor is known.
Definition: NvInferRuntime.h:756
virtual ~IProfiler() noexcept
Definition: NvInferRuntime.h:767
virtual void reportLayerTime(char const *layerName, float ms) noexcept=0
Layer time reporting callback.
Definition: NvInferRuntime.h:631
~IStreamReader() override=default
IStreamReader & operator=(IStreamReader const &) &=default
IStreamReader & operator=(IStreamReader &&) &=default
virtual int64_t read(void *destination, int64_t nbBytes)=0
Read the next number of bytes in the stream.
IStreamReader(IStreamReader &&)=default
IStreamReader(IStreamReader const &)=default
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:643
Definition: NvInferRuntime.h:695
IStreamReaderV2 & operator=(IStreamReaderV2 const &) &=default
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:707
IStreamReaderV2(IStreamReaderV2 &&)=default
~IStreamReaderV2() override=default
virtual int64_t read(void *destination, int64_t nbBytes, cudaStream_t stream) noexcept=0
Read the next number of bytes in the stream asynchronously.
IStreamReaderV2(IStreamReaderV2 const &)=default
virtual bool seek(int64_t offset, SeekPosition where) noexcept=0
Sets the position of the stream to the given offset.
IStreamReaderV2 & operator=(IStreamReaderV2 &&) &=default
IRefitter * createInferRefitter(ICudaEngine &engine, ILogger &logger) noexcept
Create an instance of an IRefitter class.
Definition: NvInferRuntime.h:4665
IRuntime * createInferRuntime(ILogger &logger) noexcept
Create an instance of an IRuntime class.
Definition: NvInferRuntime.h:4654
The TensorRT API version 1 namespace.
uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferRuntime.h:2327
v_1_0::IOutputAllocator IOutputAllocator
Definition: NvInferRuntime.h:3513
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:76
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:179
@ kSUB
Substract the second element from the first.
@ kSUM
Sum of the two operands.
@ kPROD
Product of the two operands.
@ kFLOOR_DIV
Floor division of the first element by the second.
@ kEQUAL
1 if operands are equal, 0 otherwise.
@ kMIN
Minimum of the two operands.
@ kLESS
1 if first operand is less than second operand, 0 otherwise.
@ kMAX
Maximum of the two operands.
@ kCEIL_DIV
Division rounding up.
TensorIOMode
Definition of tensor IO Mode.
Definition: NvInferRuntimeBase.h:656
HardwareCompatibilityLevel
Describes requirements of compatibility with GPU architectures other than that of the GPU on which th...
Definition: NvInfer.h:8416
SerializationFlag
List of valid flags that the engine can enable when serializing the bytes.
Definition: NvInferRuntime.h:2368
@ kEXCLUDE_WEIGHTS
Exclude the weights that can be refitted.
constexpr int32_t EnumMax< DynamicShapesKernelSpecializationStrategy >() noexcept
Maximum number of dynamic shape specialization strategies in DynamicShapesKernelSpecializationStrateg...
Definition: NvInferRuntime.h:2572
v_1_0::IProfiler IProfiler
Definition: NvInferRuntime.h:782
DynamicShapesKernelSpecializationStrategy
Different kernel specialization strategies for dynamic shapes.
Definition: NvInferRuntime.h:2546
SeekPosition
Controls the seek mode of IStreamReaderV2.
Definition: NvInferRuntime.h:681
@ kSET
From the beginning of the file.
@ kCUR
From the current position of the file.
@ kEND
From the tail of the file.
v_1_0::IStreamReaderV2 IStreamReaderV2
Definition: NvInferRuntime.h:751
uint32_t TempfileControlFlags
Represents a collection of one or more TempfileControlFlag values combined using bitwise-OR operation...
Definition: NvInferRuntime.h:860
v_1_0::IGpuAllocator IGpuAllocator
Definition: NvInferRuntime.h:1319
constexpr int32_t EnumMax< WeightsRole >() noexcept
Maximum number of elements in WeightsRole enum.
Definition: NvInferRuntime.h:803
char_t AsciiChar
Definition: NvInferRuntimeBase.h:115
@ kV2_DYNAMICEXT
IPluginV2DynamicExt.
constexpr int32_t EnumMax< LayerInformationFormat >() noexcept
Definition: NvInferRuntime.h:4478
DataType
The type of weights and tensors.
Definition: NvInferRuntimeBase.h:143
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:814
v_1_0::IDebugListener IDebugListener
Definition: NvInferRuntime.h:3554
TempfileControlFlag
Flags used to control TensorRT's behavior when creating executable temporary files.
Definition: NvInferRuntime.h:837
@ kALLOW_IN_MEMORY_FILES
Allow creating and loading files in-memory (or unnamed files).
constexpr int32_t EnumMax< OptProfileSelector >() noexcept
Number of different values of OptProfileSelector enum.
Definition: NvInferRuntime.h:2013
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:792
@ kSHIFT
shift part of IScaleLayer
@ kANY
Any other weights role.
@ kBIAS
bias for IConvolutionLayer or IDeconvolutionLayer
@ kSCALE
scale part of IScaleLayer
@ kCONSTANT
weights for IConstantLayer
@ kKERNEL
kernel for IConvolutionLayer or IDeconvolutionLayer
constexpr int32_t EnumMax< ProfilingVerbosity >() noexcept
Maximum number of profile verbosity levels in ProfilingVerbosity enum.
Definition: NvInferRuntime.h:2347
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
Definition: NvInferRuntime.h:2339
@ kLAYER_NAMES_ONLY
Print only the layer names. This is the default setting.
@ kDETAILED
Print detailed layer information including layer names and layer parameters.
TacticSource
List of tactic sources for TensorRT.
Definition: NvInferRuntime.h:2290
TensorFormat PluginFormat
PluginFormat is reserved for backward compatibility.
Definition: NvInferRuntimePlugin.h:54
constexpr int32_t EnumMax< TempfileControlFlag >() noexcept
Maximum number of elements in TempfileControlFlag enum.
Definition: NvInferRuntime.h:849
uint32_t SerializationFlags
Represents one or more SerializationFlag values using binary OR operations, e.g., 1U << Serialization...
Definition: NvInferRuntime.h:2358
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntime.h:894
ExecutionContextAllocationStrategy
Different memory allocation behaviors for IExecutionContext.
Definition: NvInferRuntime.h:2472
@ kSTATIC
Default static allocation with the maximum size across all profiles.
@ kUSER_MANAGED
The user supplies custom allocation to the execution context.
@ kON_PROFILE_CHANGE
Reallocate for a profile when it's selected.
constexpr int32_t EnumMax< TacticSource >() noexcept
Maximum number of tactic sources in TacticSource enum.
Definition: NvInferRuntime.h:2316
LayerInformationFormat
The format in which the IEngineInspector prints the layer information.
Definition: NvInferRuntime.h:4470
@ kJSON
Print layer information in JSON format.
@ kONELINE
Print layer information in one line per layer.
v_1_0::IGpuAsyncAllocator IGpuAsyncAllocator
Definition: NvInferRuntime.h:4871
v_1_0::IStreamReader IStreamReader
Definition: NvInferRuntime.h:674
AllocatorFlag
Allowed type of memory allocation.
Definition: NvInferRuntime.h:1018
@ kRESIZABLE
TensorRT may call realloc() on this allocation.
constexpr int32_t EnumMax< DeviceType >() noexcept
Maximum number of elements in DeviceType enum.
Definition: NvInferRuntime.h:821
constexpr int32_t EnumMax< DimensionOperation >() noexcept
Maximum number of elements in DimensionOperation enum.
Definition: NvInferRuntime.h:193
constexpr int32_t EnumMax< ExecutionContextAllocationStrategy >() noexcept
Maximum number of memory allocation strategies in ExecutionContextAllocationStrategy enum.
Definition: NvInferRuntime.h:2484
constexpr int32_t EnumMax< SerializationFlag >() noexcept
Maximum number of serialization flags in SerializationFlag enum.
Definition: NvInferRuntime.h:2375
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntime.h:204
@ kHOST
Data stored on host.
@ kDEVICE
Data stored on device.
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:2001
@ kOPT
This is used to set or get the value that is used in the optimization (kernel selection).
@ kMIN
This is used to set or get the minimum permitted value for dynamic dimensions etc.
@ kMAX
This is used to set or get the maximum permitted value for dynamic dimensions etc.
uint32_t AllocatorFlags
Definition: NvInferRuntime.h:1034
Summarizes tensors that a plugin might see for an input or output.
Definition: NvInferRuntime.h:362
Dims min
Lower bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:367
Dims max
Upper bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:370
Dims opt
Optimum value of tensor’s dimensions specified for auto-tuning.
Definition: NvInferRuntime.h:373
PluginTensorDesc desc
Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of ...
Definition: NvInferRuntime.h:364
Fields that a plugin might see for an input or output.
Definition: NvInferRuntimePlugin.h:73
Declaration of EnumMaxImpl struct to store maximum number of elements in an enumeration type.
Definition: NvInferRuntimeBase.h:128

  Copyright © 2024 NVIDIA Corporation
  Privacy Policy | Manage My Privacy | Do Not Sell or Share My Data | Terms of Service | Accessibility | Corporate Policies | Product Security | Contact