TensorRT-RTX 1.1.1
NvInferRuntime.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef NV_INFER_RUNTIME_H
19#define NV_INFER_RUNTIME_H
20
26
27#include "NvInferImpl.h"
28#define NV_INFER_INTERNAL_INCLUDE 1
29#include "NvInferPluginBase.h" // IWYU pragma: exports
30#undef NV_INFER_INTERNAL_INCLUDE
32
33namespace nvinfer1
34{
35
36class IExecutionContext;
37class ICudaEngine;
38class IPluginFactory;
39class IEngineInspector;
40
49
51{
52protected:
53 INoCopy() = default;
54 virtual ~INoCopy() = default;
55 INoCopy(INoCopy const& other) = delete;
56 INoCopy& operator=(INoCopy const& other) = delete;
57 INoCopy(INoCopy&& other) = delete;
58 INoCopy& operator=(INoCopy&& other) = delete;
59};
60
75enum class EngineCapability : int32_t
76{
81 kSTANDARD = 0,
82
89 kSAFETY = 1,
90
97};
98
99namespace impl
100{
102template <>
104{
105 static constexpr int32_t kVALUE = 3;
106};
107} // namespace impl
108
124{
125public:
127 void const* values;
128 int64_t count;
129};
130
141class IHostMemory : public INoCopy
142{
143public:
144 virtual ~IHostMemory() noexcept = default;
145
147 void* data() const noexcept
148 {
149 return mImpl->data();
150 }
151
153 std::size_t size() const noexcept
154 {
155 return mImpl->size();
156 }
157
159 DataType type() const noexcept
160 {
161 return mImpl->type();
162 }
163
164protected:
165 apiv::VHostMemory* mImpl;
166};
167
178enum class DimensionOperation : int32_t
179{
180 kSUM = 0,
181 kPROD = 1,
182 kMAX = 2,
183 kMIN = 3,
184 kSUB = 4,
185 kEQUAL = 5,
186 kLESS = 6,
187 kFLOOR_DIV = 7,
188 kCEIL_DIV = 8
189};
190
192template <>
193constexpr inline int32_t EnumMax<DimensionOperation>() noexcept
194{
195 return 9;
196}
197
203enum class TensorLocation : int32_t
204{
205 kDEVICE = 0,
206 kHOST = 1,
207};
208
209namespace impl
210{
212template <>
214{
215 static constexpr int32_t kVALUE = 2;
216};
217} // namespace impl
218
232{
233public:
237 bool isConstant() const noexcept
238 {
239 return mImpl->isConstant();
240 }
241
248 int64_t getConstantValue() const noexcept
249 {
250 return mImpl->getConstantValue();
251 }
252
253protected:
254 apiv::VDimensionExpr* mImpl;
255 virtual ~IDimensionExpr() noexcept = default;
256
257public:
263 bool isSizeTensor() const noexcept
264 {
265 return mImpl->isSizeTensor();
266 }
267};
268
286class IExprBuilder : public INoCopy
287{
288public:
292 IDimensionExpr const* constant(int64_t value) noexcept
293 {
294 return mImpl->constant(value);
295 }
296
304 DimensionOperation op, IDimensionExpr const& first, IDimensionExpr const& second) noexcept
305 {
306 return mImpl->operation(op, first, second);
307 }
308
309protected:
310 apiv::VExprBuilder* mImpl;
311 virtual ~IExprBuilder() noexcept = default;
312
313public:
338 IDimensionExpr const* declareSizeTensor(int32_t outputIndex, IDimensionExpr const& opt, IDimensionExpr const& upper)
339 {
340 return mImpl->declareSizeTensor(outputIndex, opt, upper);
341 }
342};
343
350{
351public:
352 int32_t nbDims;
354};
355
362{
365
368
371
374};
375
407{
408public:
409 IPluginV2DynamicExt* clone() const noexcept override = 0;
410
435 virtual DimsExprs getOutputDimensions(
436 int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept = 0;
437
441 static constexpr int32_t kFORMAT_COMBINATION_LIMIT = 100;
442
475 virtual bool supportsFormatCombination(
476 int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0;
477
515 virtual void configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs,
516 DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0;
517
527 virtual size_t getWorkspaceSize(PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs,
528 int32_t nbOutputs) const noexcept = 0;
529
542 virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc,
543 void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0;
544
545protected:
553 int32_t getTensorRTVersion() const noexcept override
554 {
555 return (static_cast<int32_t>(PluginVersion::kV2_DYNAMICEXT) << 24 | (NV_TENSORRT_VERSION & 0xFFFFFF));
556 }
557
558 virtual ~IPluginV2DynamicExt() noexcept {}
559
560private:
561 // Following are obsolete base class methods, and must not be implemented or used.
562
566 void configurePlugin(Dims const*, int32_t, Dims const*, int32_t, DataType const*, DataType const*, bool const*,
567 bool const*, PluginFormat, int32_t) noexcept override final
568 {
569 }
570
574 bool supportsFormat(DataType, PluginFormat) const noexcept override final
575 {
576 return false;
577 }
578
582 Dims getOutputDimensions(int32_t, Dims const*, int32_t) noexcept override final
583 {
584 return Dims{-1, {}};
585 }
586
594 TRT_DEPRECATED bool isOutputBroadcastAcrossBatch(int32_t, bool const*, int32_t) const noexcept override final
595 {
596 return false;
597 }
598
606 TRT_DEPRECATED bool canBroadcastInputAcrossBatch(int32_t) const noexcept override final
607 {
608 return true;
609 }
610
614 size_t getWorkspaceSize(int32_t) const noexcept override final
615 {
616 return 0;
617 }
618
622 int32_t enqueue(int32_t, void const* const*, void* const*, void*, cudaStream_t) noexcept override final
623 {
624 return 1;
625 }
626};
627
628namespace v_1_0
629{
631{
632public:
637 ~IStreamReader() override = default;
638 IStreamReader() = default;
639
643 InterfaceInfo getInterfaceInfo() const noexcept override
644 {
645 return InterfaceInfo{"IStreamReader", 1, 0};
646 }
647
656 virtual int64_t read(void* destination, int64_t nbBytes) = 0;
657
658protected:
659 IStreamReader(IStreamReader const&) = default;
663};
664
666{
667public:
672 ~IStreamWriter() override = default;
673 IStreamWriter() = default;
674
678 InterfaceInfo getInterfaceInfo() const noexcept final
679 {
680 return InterfaceInfo{"IStreamWriter", 1, 0};
681 }
682
692 virtual int64_t write(void const* data, int64_t nbBytes) = 0;
693
694protected:
695 IStreamWriter(IStreamWriter const&) = default;
699};
700} // namespace v_1_0
701
711
721
726enum class SeekPosition : int32_t
727{
729 kSET = 0,
730
732 kCUR = 1,
733
735 kEND = 2,
736};
737
738namespace v_1_0
739{
741{
742public:
747 ~IStreamReaderV2() override = default;
748 IStreamReaderV2() = default;
749
753 InterfaceInfo getInterfaceInfo() const noexcept override
754 {
755 return InterfaceInfo{"IStreamReaderV2", 1, 0};
756 }
757
768 virtual int64_t read(void* destination, int64_t nbBytes, cudaStream_t stream) noexcept = 0;
769
778 virtual bool seek(int64_t offset, SeekPosition where) noexcept = 0;
779
780protected:
785};
786} // namespace v_1_0
787
798
799namespace v_1_0
800{
802{
803public:
811 virtual void reportLayerTime(char const* layerName, float ms) noexcept = 0;
812
813 virtual ~IProfiler() noexcept {}
814};
815} // namespace v_1_0
816
829
837enum class WeightsRole : int32_t
838{
839 kKERNEL = 0,
840 kBIAS = 1,
841 kSHIFT = 2,
842 kSCALE = 3,
843 kCONSTANT = 4,
844 kANY = 5,
845};
846
848template <>
849constexpr inline int32_t EnumMax<WeightsRole>() noexcept
850{
851 return 6;
852}
853
859enum class DeviceType : int32_t
860{
861 kGPU = 0,
862 kDLA = 1,
863};
864
866template <>
867constexpr inline int32_t EnumMax<DeviceType>() noexcept
868{
869 return 2;
870}
871
882enum class TempfileControlFlag : int32_t
883{
886
891};
892
894template <>
895constexpr inline int32_t EnumMax<TempfileControlFlag>() noexcept
896{
897 return 2;
898}
899
906using TempfileControlFlags = uint32_t;
907
939enum class TensorFormat : int32_t
940{
947 kLINEAR = 0,
948
953 kCHW2 = 1,
954
958 kHWC8 = 2,
959
973 kCHW4 = 3,
974
981 kCHW16 = 4,
982
990 kCHW32 = 5,
991
996 kDHWC8 = 6,
997
1002 kCDHW32 = 7,
1003
1007 kHWC = 8,
1008
1017 kDLA_LINEAR = 9,
1018
1032 kDLA_HWC4 = 10,
1033
1038 kHWC16 = 11,
1039
1044 kDHWC = 12
1045};
1046
1047namespace impl
1048{
1050template <>
1052{
1054 static constexpr int32_t kVALUE = 13;
1055};
1056} // namespace impl
1057
1063enum class AllocatorFlag : int32_t
1064{
1066 kRESIZABLE = 0,
1067};
1068
1069namespace impl
1070{
1072template <>
1074{
1076 static constexpr int32_t kVALUE = 1;
1077};
1078} // namespace impl
1079
1080using AllocatorFlags = uint32_t;
1081
1084
1098{
1099public:
1105 enum class Severity : int32_t
1106 {
1108 kINTERNAL_ERROR = 0,
1110 kERROR = 1,
1112 kWARNING = 2,
1114 kINFO = 3,
1116 kVERBOSE = 4,
1117 };
1118
1137 virtual void log(Severity severity, AsciiChar const* msg) noexcept = 0;
1138
1139 ILogger() = default;
1140 virtual ~ILogger() = default;
1141
1142protected:
1143 // @cond SuppressDoxyWarnings
1144 ILogger(ILogger const&) = default;
1145 ILogger(ILogger&&) = default;
1146 ILogger& operator=(ILogger const&) & = default;
1147 ILogger& operator=(ILogger&&) & = default;
1148 // @endcond
1149};
1150
1151namespace impl
1152{
1154template <>
1155struct EnumMaxImpl<ILogger::Severity>
1156{
1158 static constexpr int32_t kVALUE = 5;
1159};
1160} // namespace impl
1161
1162namespace v_1_0
1163{
1164
1166{
1167public:
1193 uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept = 0;
1194
1195 ~IGpuAllocator() override = default;
1196 IGpuAllocator() = default;
1197
1235 virtual void* reallocate(void* const /*baseAddr*/, uint64_t /*alignment*/, uint64_t /*newSize*/) noexcept
1236 {
1237 return nullptr;
1238 }
1239
1258 TRT_DEPRECATED virtual bool deallocate(void* const memory) noexcept = 0;
1259
1288 virtual void* allocateAsync(
1289 uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t /*stream*/) noexcept
1290 {
1291 return allocate(size, alignment, flags);
1292 }
1321 virtual bool deallocateAsync(void* const memory, cudaStream_t /*stream*/) noexcept
1322 {
1323 return deallocate(memory);
1324 }
1325
1329 InterfaceInfo getInterfaceInfo() const noexcept override
1330 {
1331 return {"IGpuAllocator", 1, 0};
1332 }
1333
1334protected:
1335 // @cond SuppressDoxyWarnings
1336 IGpuAllocator(IGpuAllocator const&) = default;
1337 IGpuAllocator(IGpuAllocator&&) = default;
1338 IGpuAllocator& operator=(IGpuAllocator const&) & = default;
1339 IGpuAllocator& operator=(IGpuAllocator&&) & = default;
1340 // @endcond
1341};
1342
1343} // namespace v_1_0
1344
1366
1372enum class EngineValidity : int32_t
1373{
1375 kVALID = 0,
1376
1378 kSUBOPTIMAL = 1,
1379
1381 kINVALID = 2,
1382};
1383
1385namespace impl
1386{
1387template <>
1389{
1390 static constexpr int32_t kVALUE = 3;
1391};
1392} // namespace impl
1393
1398enum class EngineInvalidityDiagnostics : uint64_t
1399{
1401 kVERSION_MISMATCH = 1ULL << 0,
1402
1404 kUNSUPPORTED_CC = 1ULL << 1,
1405
1407 kOLD_CUDA_DRIVER = 1ULL << 2,
1408
1410 kOLD_CUDA_RUNTIME = 1ULL << 3,
1411
1413 kINSUFFICIENT_GPU_MEMORY = 1ULL << 4,
1414
1416 kMALFORMED_ENGINE = 1ULL << 5,
1417
1419 kCUDA_ERROR = 1ULL << 6,
1420};
1421
1422
1430class IRuntime : public INoCopy
1431{
1432public:
1433 virtual ~IRuntime() noexcept = default;
1434
1446 void setDLACore(int32_t dlaCore) noexcept
1447 {
1448 mImpl->setDLACore(dlaCore);
1449 }
1450
1456 int32_t getDLACore() const noexcept
1457 {
1458 return mImpl->getDLACore();
1459 }
1460
1464 int32_t getNbDLACores() const noexcept
1465 {
1466 return mImpl->getNbDLACores();
1467 }
1468
1480 void setGpuAllocator(IGpuAllocator* allocator) noexcept
1481 {
1482 mImpl->setGpuAllocator(allocator);
1483 }
1484
1496 //
1499 void setErrorRecorder(IErrorRecorder* recorder) noexcept
1500 {
1501 mImpl->setErrorRecorder(recorder);
1502 }
1503
1515 {
1516 return mImpl->getErrorRecorder();
1517 }
1518
1532 ICudaEngine* deserializeCudaEngine(void const* blob, std::size_t size) noexcept
1533 {
1534 return mImpl->deserializeCudaEngine(blob, size);
1535 }
1536
1556 {
1557 return mImpl->deserializeCudaEngineV2(streamReader);
1558 }
1559
1565 ILogger* getLogger() const noexcept
1566 {
1567 return mImpl->getLogger();
1568 }
1569
1580 bool setMaxThreads(int32_t maxThreads) noexcept
1581 {
1582 return mImpl->setMaxThreads(maxThreads);
1583 }
1584
1594 int32_t getMaxThreads() const noexcept
1595 {
1596 return mImpl->getMaxThreads();
1597 }
1598
1629 void setTemporaryDirectory(char const* path) noexcept
1630 {
1631 return mImpl->setTemporaryDirectory(path);
1632 }
1633
1640 char const* getTemporaryDirectory() const noexcept
1641 {
1642 return mImpl->getTemporaryDirectory();
1643 }
1644
1657 {
1658 return mImpl->setTempfileControlFlags(flags);
1659 }
1660
1669 {
1670 return mImpl->getTempfileControlFlags();
1671 }
1672
1679 {
1680 return mImpl->getPluginRegistry();
1681 }
1682
1696 IRuntime* loadRuntime(char const* path) noexcept
1697 {
1698 return mImpl->loadRuntime(path);
1699 }
1700
1708 void setEngineHostCodeAllowed(bool allowed) noexcept
1709 {
1710 return mImpl->setEngineHostCodeAllowed(allowed);
1711 }
1712
1718 bool getEngineHostCodeAllowed() const noexcept
1719 {
1720 return mImpl->getEngineHostCodeAllowed();
1721 }
1722
1730 int64_t getEngineHeaderSize() const noexcept {
1731 return mImpl->getEngineHeaderSize();
1732 }
1733
1757 EngineValidity getEngineValidity(void const* blob, int64_t blobSize, uint64_t* diagnostics) const noexcept {
1758 return mImpl->getEngineValidity(blob, blobSize, diagnostics);
1759 }
1760
1761
1762protected:
1763 apiv::VRuntime* mImpl;
1764};
1765
1773class IRefitter : public INoCopy
1774{
1775public:
1776 virtual ~IRefitter() noexcept = default;
1777
1793 bool setWeights(char const* layerName, WeightsRole role, Weights weights) noexcept
1794 {
1795 return mImpl->setWeights(layerName, role, weights);
1796 }
1797
1810 bool refitCudaEngine() noexcept
1811 {
1812 return mImpl->refitCudaEngine();
1813 }
1814
1831 int32_t getMissing(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
1832 {
1833 return mImpl->getMissing(size, layerNames, roles);
1834 }
1835
1848 int32_t getAll(int32_t size, char const** layerNames, WeightsRole* roles) noexcept
1849 {
1850 return mImpl->getAll(size, layerNames, roles);
1851 }
1852
1864 //
1867 void setErrorRecorder(IErrorRecorder* recorder) noexcept
1868 {
1869 mImpl->setErrorRecorder(recorder);
1870 }
1871
1883 {
1884 return mImpl->getErrorRecorder();
1885 }
1886
1907 bool setNamedWeights(char const* name, Weights weights) noexcept
1908 {
1909 return mImpl->setNamedWeights(name, weights);
1910 }
1911
1927 int32_t getMissingWeights(int32_t size, char const** weightsNames) noexcept
1928 {
1929 return mImpl->getMissingWeights(size, weightsNames);
1930 }
1931
1943 int32_t getAllWeights(int32_t size, char const** weightsNames) noexcept
1944 {
1945 return mImpl->getAllWeights(size, weightsNames);
1946 }
1947
1953 ILogger* getLogger() const noexcept
1954 {
1955 return mImpl->getLogger();
1956 }
1957
1969 bool setMaxThreads(int32_t maxThreads) noexcept
1970 {
1971 return mImpl->setMaxThreads(maxThreads);
1972 }
1973
1983 int32_t getMaxThreads() const noexcept
1984 {
1985 return mImpl->getMaxThreads();
1986 }
1987
2010 bool setNamedWeights(char const* name, Weights weights, TensorLocation location) noexcept
2011 {
2012 return mImpl->setNamedWeightsWithLocation(name, weights, location);
2013 }
2014
2026 Weights getNamedWeights(char const* weightsName) const noexcept
2027 {
2028 return mImpl->getNamedWeights(weightsName);
2029 }
2030
2042 TensorLocation getWeightsLocation(char const* weightsName) const noexcept
2043 {
2044 return mImpl->getWeightsLocation(weightsName);
2045 }
2046
2058 bool unsetNamedWeights(char const* weightsName) noexcept
2059 {
2060 return mImpl->unsetNamedWeights(weightsName);
2061 }
2062
2074 void setWeightsValidation(bool weightsValidation) noexcept
2075 {
2076 return mImpl->setWeightsValidation(weightsValidation);
2077 }
2078
2082 bool getWeightsValidation() const noexcept
2083 {
2084 return mImpl->getWeightsValidation();
2085 }
2086
2104 bool refitCudaEngineAsync(cudaStream_t stream) noexcept
2105 {
2106 return mImpl->refitCudaEngineAsync(stream);
2107 }
2108
2122 Weights getWeightsPrototype(char const* weightsName) const noexcept
2123 {
2124 return mImpl->getWeightsPrototype(weightsName);
2125 }
2126
2127protected:
2128 apiv::VRefitter* mImpl;
2129};
2130
2141enum class OptProfileSelector : int32_t
2142{
2143 kMIN = 0,
2144 kOPT = 1,
2145 kMAX = 2
2146};
2147
2153template <>
2154constexpr inline int32_t EnumMax<OptProfileSelector>() noexcept
2155{
2156 return 3;
2157}
2158
2182{
2183public:
2211 bool setDimensions(char const* inputName, OptProfileSelector select, Dims const& dims) noexcept
2212 {
2213 return mImpl->setDimensions(inputName, select, dims);
2214 }
2215
2223 Dims getDimensions(char const* inputName, OptProfileSelector select) const noexcept
2224 {
2225 return mImpl->getDimensions(inputName, select);
2226 }
2227
2276 char const* inputName, OptProfileSelector select, int32_t const* values, int32_t nbValues) noexcept
2277 {
2278 return mImpl->setShapeValues(inputName, select, values, nbValues);
2279 }
2280
2289 int32_t getNbShapeValues(char const* inputName) const noexcept
2290 {
2291 return mImpl->getNbShapeValues(inputName);
2292 }
2293
2303 TRT_DEPRECATED int32_t const* getShapeValues(char const* inputName, OptProfileSelector select) const noexcept
2304 {
2305 return mImpl->getShapeValues(inputName, select);
2306 }
2307
2321 bool setExtraMemoryTarget(float target) noexcept
2322 {
2323 return mImpl->setExtraMemoryTarget(target);
2324 }
2325
2333 float getExtraMemoryTarget() const noexcept
2334 {
2335 return mImpl->getExtraMemoryTarget();
2336 }
2337
2350 bool isValid() const noexcept
2351 {
2352 return mImpl->isValid();
2353 }
2354
2401 char const* inputName, OptProfileSelector select, int64_t const* values, int32_t nbValues) noexcept
2402 {
2403 return mImpl->setShapeValuesV2(inputName, select, values, nbValues);
2404 }
2405
2413 int64_t const* getShapeValuesV2(char const* inputName, OptProfileSelector select) const noexcept
2414 {
2415 return mImpl->getShapeValuesV2(inputName, select);
2416 }
2417
2418protected:
2419 apiv::VOptimizationProfile* mImpl;
2420 virtual ~IOptimizationProfile() noexcept = default;
2421};
2422
2430enum class TacticSource : int32_t
2431{
2436
2440
2445
2450
2454};
2455
2456template <>
2457constexpr inline int32_t EnumMax<TacticSource>() noexcept
2458{
2459 return 5;
2460}
2461
2468using TacticSources = uint32_t;
2469
2479enum class ProfilingVerbosity : int32_t
2480{
2481 kLAYER_NAMES_ONLY = 0,
2482 kNONE = 1,
2483 kDETAILED = 2,
2484};
2485
2487template <>
2488constexpr inline int32_t EnumMax<ProfilingVerbosity>() noexcept
2489{
2490 return 3;
2491}
2492
2499using SerializationFlags = uint32_t;
2500
2508enum class SerializationFlag : int32_t
2509{
2510 kEXCLUDE_WEIGHTS = 0,
2512};
2513
2515template <>
2516constexpr inline int32_t EnumMax<SerializationFlag>() noexcept
2517{
2518 return 2;
2519}
2520
2529{
2530public:
2531 virtual ~ISerializationConfig() noexcept = default;
2532
2544 bool setFlags(SerializationFlags serializationFlags) noexcept
2545 {
2546 return mImpl->setFlags(serializationFlags);
2547 }
2548
2557 {
2558 return mImpl->getFlags();
2559 }
2560
2568 bool clearFlag(SerializationFlag serializationFlag) noexcept
2569 {
2570 return mImpl->clearFlag(serializationFlag);
2571 }
2572
2580 bool setFlag(SerializationFlag serializationFlag) noexcept
2581 {
2582 return mImpl->setFlag(serializationFlag);
2583 }
2584
2592 bool getFlag(SerializationFlag serializationFlag) const noexcept
2593 {
2594 return mImpl->getFlag(serializationFlag);
2595 }
2596
2597protected:
2598 apiv::VSerializationConfig* mImpl;
2599};
2600
2613{
2614 kSTATIC = 0,
2615 kON_PROFILE_CHANGE = 1,
2616 kUSER_MANAGED = 2,
2617};
2618
2624template <>
2625constexpr inline int32_t EnumMax<ExecutionContextAllocationStrategy>() noexcept
2626{
2627 return 3;
2628}
2629
2638{
2639public:
2640 virtual ~IRuntimeCache() noexcept = default;
2641
2647 IHostMemory* serialize() const noexcept
2648 {
2649 return mImpl->serialize();
2650 }
2651
2658 bool deserialize(void const* blob, size_t size) noexcept
2659 {
2660 return mImpl->deserialize(blob, size);
2661 }
2662
2666 bool reset() noexcept
2667 {
2668 return mImpl->reset();
2669 }
2670
2671protected:
2672 apiv::VRuntimeCache* mImpl;
2673};
2674
2687{
2693 kLAZY = 0,
2694
2699 kEAGER = 1,
2700
2704 kNONE = 2,
2705};
2706
2712template <>
2714{
2715 return 3;
2716}
2717
2725{
2726public:
2727 virtual ~IRuntimeConfig() noexcept = default;
2728
2734 void setExecutionContextAllocationStrategy(ExecutionContextAllocationStrategy strategy) noexcept
2735 {
2736 return mImpl->setExecutionContextAllocationStrategy(strategy);
2737 }
2738
2745 {
2746 return mImpl->getExecutionContextAllocationStrategy();
2747 }
2748
2755 {
2756 return mImpl->createRuntimeCache();
2757 }
2758
2764 bool setRuntimeCache(IRuntimeCache const& cache) noexcept
2765 {
2766 return mImpl->setRuntimeCache(cache);
2767 }
2768
2775 {
2776 return mImpl->getRuntimeCache();
2777 }
2778
2785 DynamicShapesKernelSpecializationStrategy dynamicShapesKernelSpecializationStrategy) noexcept
2786 {
2787 return mImpl->setDynamicShapesKernelSpecializationStrategy(dynamicShapesKernelSpecializationStrategy);
2788 }
2789
2796 {
2797 return mImpl->getDynamicShapesKernelSpecializationStrategy();
2798 }
2799
2800
2801protected:
2802 apiv::VRuntimeConfig* mImpl;
2803}; // class IRuntimeConfig
2804
2813enum class EngineStat : int32_t
2814{
2817
2820};
2821
2827template <>
2828constexpr inline int32_t EnumMax<EngineStat>() noexcept
2829{
2830 return 2;
2831}
2832
2840class ICudaEngine : public INoCopy
2841{
2842public:
2843 virtual ~ICudaEngine() noexcept = default;
2844
2855 Dims getTensorShape(char const* tensorName) const noexcept
2856 {
2857 return mImpl->getTensorShape(tensorName);
2858 }
2859
2870 DataType getTensorDataType(char const* tensorName) const noexcept
2871 {
2872 return mImpl->getTensorDataType(tensorName);
2873 }
2874
2884 int32_t getNbLayers() const noexcept
2885 {
2886 return mImpl->getNbLayers();
2887 }
2888
2898 IHostMemory* serialize() const noexcept
2899 {
2900 return mImpl->serialize();
2901 }
2902
2917 {
2918 return mImpl->createExecutionContext(strategy);
2919 }
2920
2933 TensorLocation getTensorLocation(char const* tensorName) const noexcept
2934 {
2935 return mImpl->getTensorLocation(tensorName);
2936 }
2937
2953 bool isShapeInferenceIO(char const* tensorName) const noexcept
2954 {
2955 return mImpl->isShapeInferenceIO(tensorName);
2956 }
2957
2967 TensorIOMode getTensorIOMode(char const* tensorName) const noexcept
2968 {
2969 return mImpl->getTensorIOMode(tensorName);
2970 }
2971
2980 {
2981 return mImpl->createExecutionContextWithRuntimeConfig(runtimeConfig);
2982 }
2983
2993 {
2994 return mImpl->createRuntimeConfig();
2995 }
2996
3008 int64_t getDeviceMemorySizeV2() const noexcept
3009 {
3010 return mImpl->getDeviceMemorySizeV2();
3011 }
3012
3024 int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept
3025 {
3026 return mImpl->getDeviceMemorySizeForProfileV2(profileIndex);
3027 }
3028
3034 bool isRefittable() const noexcept
3035 {
3036 return mImpl->isRefittable();
3037 }
3038
3055 int32_t getTensorBytesPerComponent(char const* tensorName) const noexcept
3056 {
3057 return mImpl->getTensorBytesPerComponent(tensorName);
3058 }
3059
3073 int32_t getTensorBytesPerComponent(char const* tensorName, int32_t profileIndex) const noexcept
3074 {
3075 return mImpl->getTensorBytesPerComponentV2(tensorName, profileIndex);
3076 }
3077
3094 int32_t getTensorComponentsPerElement(char const* tensorName) const noexcept
3095 {
3096 return mImpl->getTensorComponentsPerElement(tensorName);
3097 }
3098
3112 int32_t getTensorComponentsPerElement(char const* tensorName, int32_t profileIndex) const noexcept
3113 {
3114 return mImpl->getTensorComponentsPerElementV2(tensorName, profileIndex);
3115 }
3116
3127 TensorFormat getTensorFormat(char const* tensorName) const noexcept
3128 {
3129 return mImpl->getTensorFormat(tensorName);
3130 }
3131
3141 TensorFormat getTensorFormat(char const* tensorName, int32_t profileIndex) const noexcept
3142 {
3143 return mImpl->getTensorFormatV2(tensorName, profileIndex);
3144 }
3145
3165 char const* getTensorFormatDesc(char const* tensorName) const noexcept
3166 {
3167 return mImpl->getTensorFormatDesc(tensorName);
3168 }
3169
3188 char const* getTensorFormatDesc(char const* tensorName, int32_t profileIndex) const noexcept
3189 {
3190 return mImpl->getTensorFormatDescV2(tensorName, profileIndex);
3191 }
3192
3205 int32_t getTensorVectorizedDim(char const* tensorName) const noexcept
3206 {
3207 return mImpl->getTensorVectorizedDim(tensorName);
3208 }
3209
3221 int32_t getTensorVectorizedDim(char const* tensorName, int32_t profileIndex) const noexcept
3222 {
3223 return mImpl->getTensorVectorizedDimV2(tensorName, profileIndex);
3224 }
3225
3236 char const* getName() const noexcept
3237 {
3238 return mImpl->getName();
3239 }
3240
3247 int32_t getNbOptimizationProfiles() const noexcept
3248 {
3249 return mImpl->getNbOptimizationProfiles();
3250 }
3251
3267 Dims getProfileShape(char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
3268 {
3269 return mImpl->getProfileShape(tensorName, profileIndex, select);
3270 }
3271
3283 {
3284 return mImpl->getEngineCapability();
3285 }
3286
3301 void setErrorRecorder(IErrorRecorder* recorder) noexcept
3302 {
3303 return mImpl->setErrorRecorder(recorder);
3304 }
3305
3317 {
3318 return mImpl->getErrorRecorder();
3319 }
3320
3331 {
3332 return mImpl->hasImplicitBatchDimension();
3333 }
3334
3347 {
3348 return mImpl->getTacticSources();
3349 }
3350
3359 {
3360 return mImpl->getProfilingVerbosity();
3361 }
3362
3369 {
3370 return mImpl->createEngineInspector();
3371 }
3372
3381 int32_t getNbIOTensors() const noexcept
3382 {
3383 return mImpl->getNbIOTensors();
3384 }
3385
3393 char const* getIOTensorName(int32_t index) const noexcept
3394 {
3395 return mImpl->getIOTensorName(index);
3396 }
3397
3405 {
3406 return mImpl->getHardwareCompatibilityLevel();
3407 }
3408
3419 int32_t getNbAuxStreams() const noexcept
3420 {
3421 return mImpl->getNbAuxStreams();
3422 }
3423
3430 {
3431 return mImpl->createSerializationConfig();
3432 }
3433
3446 {
3447 return mImpl->serializeWithConfig(config);
3448 }
3449
3461 int64_t getStreamableWeightsSize() const noexcept
3462 {
3463 return mImpl->getStreamableWeightsSize();
3464 }
3465
3503 bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept
3504 {
3505 return mImpl->setWeightStreamingBudgetV2(gpuMemoryBudget);
3506 }
3507
3521 int64_t getWeightStreamingBudgetV2() const noexcept
3522 {
3523 return mImpl->getWeightStreamingBudgetV2();
3524 }
3525
3546 int64_t getWeightStreamingAutomaticBudget() const noexcept
3547 {
3548 return mImpl->getWeightStreamingAutomaticBudget();
3549 }
3550
3575 {
3576 return mImpl->getWeightStreamingScratchMemorySize();
3577 }
3578
3588 bool isDebugTensor(char const* name) const noexcept
3589 {
3590 return mImpl->isDebugTensor(name);
3591 }
3592
3613 char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
3614 {
3615 return mImpl->getProfileTensorValuesV2(tensorName, profileIndex, select);
3616 }
3617
3641 int64_t getEngineStat(EngineStat stat) const noexcept
3642 {
3643 return mImpl->getEngineStat(stat);
3644 }
3645
3646protected:
3647 apiv::VCudaEngine* mImpl;
3648};
3649
3650namespace v_1_0
3651{
3653{
3654public:
3658 InterfaceInfo getInterfaceInfo() const noexcept override
3659 {
3660 return {"IOutputAllocator", 1, 0};
3661 }
3662
3686 char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment, cudaStream_t /*stream*/)
3687 {
3688 return nullptr;
3689 }
3690
3699 virtual void notifyShape(char const* tensorName, Dims const& dims) noexcept = 0;
3700};
3701} // namespace v_1_0
3702
3711
3712namespace v_1_0
3713{
3715{
3716public:
3720 InterfaceInfo getInterfaceInfo() const noexcept override
3721 {
3722 return {"IDebugListener", 1, 0};
3723 }
3724
3738 virtual bool processDebugTensor(void const* addr, TensorLocation location, DataType type, Dims const& shape,
3739 char const* name, cudaStream_t stream)
3740 = 0;
3741
3742 ~IDebugListener() override = default;
3743};
3744} // namespace v_1_0
3745
3752
3764{
3765public:
3766 virtual ~IExecutionContext() noexcept = default;
3767
3776 void setDebugSync(bool sync) noexcept
3777 {
3778 mImpl->setDebugSync(sync);
3779 }
3780
3786 bool getDebugSync() const noexcept
3787 {
3788 return mImpl->getDebugSync();
3789 }
3790
3796 void setProfiler(IProfiler* profiler) noexcept
3797 {
3798 mImpl->setProfiler(profiler);
3799 }
3800
3806 IProfiler* getProfiler() const noexcept
3807 {
3808 return mImpl->getProfiler();
3809 }
3810
3816 ICudaEngine const& getEngine() const noexcept
3817 {
3818 return mImpl->getEngine();
3819 }
3820
3830 void setName(char const* name) noexcept
3831 {
3832 mImpl->setName(name);
3833 }
3834
3840 char const* getName() const noexcept
3841 {
3842 return mImpl->getName();
3843 }
3844
3867 void setDeviceMemory(void* memory) noexcept
3868 {
3869 mImpl->setDeviceMemory(memory);
3870 }
3871
3889 void setDeviceMemoryV2(void* memory, int64_t size) noexcept
3890 {
3891 return mImpl->setDeviceMemoryV2(memory, size);
3892 }
3893
3910 Dims getTensorStrides(char const* tensorName) const noexcept
3911 {
3912 return mImpl->getTensorStrides(tensorName);
3913 }
3914
3915public:
3925 int32_t getOptimizationProfile() const noexcept
3926 {
3927 return mImpl->getOptimizationProfile();
3928 }
3929
3943 bool setInputShape(char const* tensorName, Dims const& dims) noexcept
3944 {
3945 return mImpl->setInputShape(tensorName, dims);
3946 }
3947
3980 Dims getTensorShape(char const* tensorName) const noexcept
3981 {
3982 return mImpl->getTensorShape(tensorName);
3983 }
3984
3996 bool allInputDimensionsSpecified() const noexcept
3997 {
3998 return mImpl->allInputDimensionsSpecified();
3999 }
4000
4015 void setErrorRecorder(IErrorRecorder* recorder) noexcept
4016 {
4017 mImpl->setErrorRecorder(recorder);
4018 }
4019
4031 {
4032 return mImpl->getErrorRecorder();
4033 }
4034
4047 bool executeV2(void* const* bindings) noexcept
4048 {
4049 return mImpl->executeV2(bindings);
4050 }
4051
4091 bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
4092 {
4093 return mImpl->setOptimizationProfileAsync(profileIndex, stream);
4094 }
4095
4107 void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
4108 {
4109 mImpl->setEnqueueEmitsProfile(enqueueEmitsProfile);
4110 }
4111
4119 bool getEnqueueEmitsProfile() const noexcept
4120 {
4121 return mImpl->getEnqueueEmitsProfile();
4122 }
4123
4149 bool reportToProfiler() const noexcept
4150 {
4151 return mImpl->reportToProfiler();
4152 }
4153
4193 bool setTensorAddress(char const* tensorName, void* data) noexcept
4194 {
4195 return mImpl->setTensorAddress(tensorName, data);
4196 }
4197
4210 void const* getTensorAddress(char const* tensorName) const noexcept
4211 {
4212 return mImpl->getTensorAddress(tensorName);
4213 }
4214
4233 bool setOutputTensorAddress(char const* tensorName, void* data) noexcept
4234 {
4235 return mImpl->setOutputTensorAddress(tensorName, data);
4236 }
4237
4255 bool setInputTensorAddress(char const* tensorName, void const* data) noexcept
4256 {
4257 return mImpl->setInputTensorAddress(tensorName, data);
4258 }
4259
4274 void* getOutputTensorAddress(char const* tensorName) const noexcept
4275 {
4276 return mImpl->getOutputTensorAddress(tensorName);
4277 }
4278
4307 int32_t inferShapes(int32_t nbMaxNames, char const** tensorNames) noexcept
4308 {
4309 return mImpl->inferShapes(nbMaxNames, tensorNames);
4310 }
4311
4325 {
4326 return mImpl->updateDeviceMemorySizeForShapes();
4327 }
4328
4340 bool setInputConsumedEvent(cudaEvent_t event) noexcept
4341 {
4342 return mImpl->setInputConsumedEvent(event);
4343 }
4344
4350 cudaEvent_t getInputConsumedEvent() const noexcept
4351 {
4352 return mImpl->getInputConsumedEvent();
4353 }
4354
4369 bool setOutputAllocator(char const* tensorName, IOutputAllocator* outputAllocator) noexcept
4370 {
4371 return mImpl->setOutputAllocator(tensorName, outputAllocator);
4372 }
4373
4382 IOutputAllocator* getOutputAllocator(char const* tensorName) const noexcept
4383 {
4384 return mImpl->getOutputAllocator(tensorName);
4385 }
4386
4400 int64_t getMaxOutputSize(char const* tensorName) const noexcept
4401 {
4402 return mImpl->getMaxOutputSize(tensorName);
4403 }
4404
4421 {
4422 return mImpl->setTemporaryStorageAllocator(allocator);
4423 }
4424
4431 {
4432 return mImpl->getTemporaryStorageAllocator();
4433 }
4434
4454 bool enqueueV3(cudaStream_t stream) noexcept
4455 {
4456 return mImpl->enqueueV3(stream);
4457 }
4458
4470 void setPersistentCacheLimit(size_t size) noexcept
4471 {
4472 mImpl->setPersistentCacheLimit(size);
4473 }
4474
4481 size_t getPersistentCacheLimit() const noexcept
4482 {
4483 return mImpl->getPersistentCacheLimit();
4484 }
4485
4505 bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
4506 {
4507 return mImpl->setNvtxVerbosity(verbosity);
4508 }
4509
4518 {
4519 return mImpl->getNvtxVerbosity();
4520 }
4521
4548 void setAuxStreams(cudaStream_t* auxStreams, int32_t nbStreams) noexcept
4549 {
4550 mImpl->setAuxStreams(auxStreams, nbStreams);
4551 }
4552
4560 bool setDebugListener(IDebugListener* listener) noexcept
4561 {
4562 return mImpl->setDebugListener(listener);
4563 }
4564
4571 {
4572 return mImpl->getDebugListener();
4573 }
4574
4589 bool setTensorDebugState(char const* name, bool flag) noexcept
4590 {
4591 return mImpl->setTensorDebugState(name, flag);
4592 }
4593
4601 bool getDebugState(char const* name) const noexcept
4602 {
4603 return mImpl->getDebugState(name);
4604 }
4605
4612 {
4613 return mImpl->getRuntimeConfig();
4614 }
4615
4624 bool setAllTensorsDebugState(bool flag) noexcept
4625 {
4626 return mImpl->setAllTensorsDebugState(flag);
4627 }
4628
4640 bool setUnfusedTensorsDebugState(bool flag) noexcept
4641 {
4642 return mImpl->setUnfusedTensorsDebugState(flag);
4643 }
4644
4650 bool getUnfusedTensorsDebugState() const noexcept
4651 {
4652 return mImpl->getUnfusedTensorsDebugState();
4653 }
4654
4655protected:
4656 apiv::VExecutionContext* mImpl;
4657}; // class IExecutionContext
4658
4666enum class LayerInformationFormat : int32_t
4667{
4668 kONELINE = 0,
4669 kJSON = 1,
4670};
4671
4674template <>
4675constexpr inline int32_t EnumMax<LayerInformationFormat>() noexcept
4676{
4677 return 2;
4678}
4679
4696{
4697public:
4698 virtual ~IEngineInspector() noexcept = default;
4699
4712 bool setExecutionContext(IExecutionContext const* context) noexcept
4713 {
4714 return mImpl->setExecutionContext(context);
4715 }
4716
4725 {
4726 return mImpl->getExecutionContext();
4727 }
4728
4749 char const* getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
4750 {
4751 return mImpl->getLayerInformation(layerIndex, format);
4752 }
4753
4772 char const* getEngineInformation(LayerInformationFormat format) const noexcept
4773 {
4774 return mImpl->getEngineInformation(format);
4775 }
4776
4791 void setErrorRecorder(IErrorRecorder* recorder) noexcept
4792 {
4793 mImpl->setErrorRecorder(recorder);
4794 }
4795
4807 {
4808 return mImpl->getErrorRecorder();
4809 }
4810
4811protected:
4812 apiv::VEngineInspector* mImpl;
4813}; // class IEngineInspector
4814
4815} // namespace nvinfer1
4816
4821extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(void* logger, int32_t version) noexcept;
4822
4827extern "C" TENSORRTAPI void* createInferRefitter_INTERNAL(void* engine, void* logger, int32_t version) noexcept;
4828
4833
4839extern "C" TENSORRTAPI nvinfer1::ILogger* getLogger() noexcept;
4840
4841namespace nvinfer1
4842{
4843namespace // unnamed namespace avoids linkage surprises when linking objects built with different versions of this
4844 // header.
4845{
4851inline IRuntime* createInferRuntime(ILogger& logger) noexcept
4852{
4853 return static_cast<IRuntime*>(createInferRuntime_INTERNAL(&logger, NV_TENSORRT_VERSION));
4854}
4855
4862inline IRefitter* createInferRefitter(ICudaEngine& engine, ILogger& logger) noexcept
4863{
4864 return static_cast<IRefitter*>(createInferRefitter_INTERNAL(&engine, &logger, NV_TENSORRT_VERSION));
4865}
4866
4867} // namespace
4868
4880template <typename T>
4882{
4883public:
4885 {
4886 getPluginRegistry()->registerCreator(instance, "");
4887 }
4888
4889private:
4891 T instance{};
4892};
4893
4894} // namespace nvinfer1
4895
4896namespace nvinfer1
4897{
4907{
4908public:
4916 virtual ILogger* findLogger() = 0;
4917
4918protected:
4919 virtual ~ILoggerFinder() = default;
4920};
4921
4924namespace v_1_0
4925{
4926
4928{
4929public:
4931 ~IGpuAsyncAllocator() override = default;
4932
4962 void* allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags,
4963 cudaStream_t /*stream*/) noexcept override = 0;
4964
4990 bool deallocateAsync(void* const memory, cudaStream_t /*stream*/) noexcept override = 0;
4991
5016 uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept override
5017 {
5018 return allocateAsync(size, alignment, flags, nullptr);
5019 }
5020
5039 TRT_DEPRECATED bool deallocate(void* const memory) noexcept override
5040 {
5041 return deallocateAsync(memory, nullptr);
5042 }
5043
5047 InterfaceInfo getInterfaceInfo() const noexcept override
5048 {
5049 return {"IGpuAllocator", 1, 0};
5050 }
5051};
5052
5053} // namespace v_1_0
5054
5069
5070} // namespace nvinfer1
5071
5075extern "C" TENSORRTAPI int32_t getInferLibMajorVersion() noexcept;
5079extern "C" TENSORRTAPI int32_t getInferLibMinorVersion() noexcept;
5083extern "C" TENSORRTAPI int32_t getInferLibPatchVersion() noexcept;
5087extern "C" TENSORRTAPI int32_t getInferLibBuildVersion() noexcept;
5088
5089#endif // NV_INFER_RUNTIME_H
nvinfer1::ILogger * getLogger() noexcept
Return the logger object.
int32_t getInferLibMajorVersion() noexcept
Return the library major version number.
nvinfer1::IPluginRegistry * getPluginRegistry() noexcept
Return the plugin registry.
int32_t getInferLibPatchVersion() noexcept
Return the library patch version number.
int32_t getInferLibMinorVersion() noexcept
Return the library minor version number.
int32_t getInferLibBuildVersion() noexcept
Return the library build version number.
#define TENSORRTAPI
Definition: NvInferRuntimeBase.h:69
#define NV_TENSORRT_VERSION
Definition: NvInferRuntimeBase.h:101
#define TRT_DEPRECATED
Definition: NvInferRuntimeBase.h:42
#define TRT_DEPRECATED_ENUM
Definition: NvInferRuntimeBase.h:43
Structure to define the dimensions of a tensor.
Definition: NvInferRuntimeBase.h:216
static constexpr int32_t MAX_DIMS
The maximum rank (number of dimensions) supported for a tensor.
Definition: NvInferRuntimeBase.h:219
Analog of class Dims with expressions instead of constants for the dimensions.
Definition: NvInferRuntime.h:350
IDimensionExpr const * d[Dims::MAX_DIMS]
The extent of each dimension.
Definition: NvInferRuntime.h:353
int32_t nbDims
The number of dimensions.
Definition: NvInferRuntime.h:352
An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:2841
int32_t getTensorBytesPerComponent(char const *tensorName) const noexcept
Return the number of bytes per component of an element, or -1 if the tensor is not vectorized or prov...
Definition: NvInferRuntime.h:3055
ISerializationConfig * createSerializationConfig() noexcept
Create a serialization configuration object.
Definition: NvInferRuntime.h:3429
char const * getIOTensorName(int32_t index) const noexcept
Return name of an IO tensor.
Definition: NvInferRuntime.h:3393
int64_t getWeightStreamingBudgetV2() const noexcept
Returns the current weight streaming device memory budget in bytes.
Definition: NvInferRuntime.h:3521
EngineCapability getEngineCapability() const noexcept
Determine what execution capability this engine has.
Definition: NvInferRuntime.h:3282
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:3316
TensorFormat getTensorFormat(char const *tensorName, int32_t profileIndex) const noexcept
Return the tensor format of given profile, or TensorFormat::kLINEAR if the provided name does not map...
Definition: NvInferRuntime.h:3141
int64_t const * getProfileTensorValuesV2(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values (not dimensions) for an input tensor given its name under ...
Definition: NvInferRuntime.h:3612
TRT_DEPRECATED bool hasImplicitBatchDimension() const noexcept
Query whether the engine was built with an implicit batch dimension.
Definition: NvInferRuntime.h:3330
apiv::VCudaEngine * mImpl
Definition: NvInferRuntime.h:3647
IExecutionContext * createExecutionContext(ExecutionContextAllocationStrategy strategy=ExecutionContextAllocationStrategy::kSTATIC) noexcept
Create an execution context and specify the strategy for allocating internal activation memory.
Definition: NvInferRuntime.h:2915
char const * getTensorFormatDesc(char const *tensorName) const noexcept
Return the human readable description of the tensor format, or empty string if the provided name does...
Definition: NvInferRuntime.h:3165
Dims getProfileShape(char const *tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for an input tensor given its name under an optimizati...
Definition: NvInferRuntime.h:3267
bool setWeightStreamingBudgetV2(int64_t gpuMemoryBudget) noexcept
Limit the maximum amount of GPU memory usable for network weights in bytes.
Definition: NvInferRuntime.h:3503
IExecutionContext * createExecutionContext(IRuntimeConfig *runtimeConfig) noexcept
Create an execution context with TensorRT JIT runtime config.
Definition: NvInferRuntime.h:2979
int32_t getNbAuxStreams() const noexcept
Return the number of auxiliary streams used by this engine.
Definition: NvInferRuntime.h:3419
int64_t getStreamableWeightsSize() const noexcept
Get the total size in bytes of all streamable weights.
Definition: NvInferRuntime.h:3461
DataType getTensorDataType(char const *tensorName) const noexcept
Determine the required data type for a buffer from its tensor name.
Definition: NvInferRuntime.h:2870
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:3301
TacticSources getTacticSources() const noexcept
return the tactic sources required by this engine.
Definition: NvInferRuntime.h:3346
IHostMemory * serializeWithConfig(ISerializationConfig &config) const noexcept
Serialize the network to a stream with the provided SerializationConfig.
Definition: NvInferRuntime.h:3445
virtual ~ICudaEngine() noexcept=default
int64_t getWeightStreamingAutomaticBudget() const noexcept
TensorRT automatically determines a device memory budget for the model to run. The budget is close to...
Definition: NvInferRuntime.h:3546
bool isDebugTensor(char const *name) const noexcept
Check if a tensor is marked as a debug tensor.
Definition: NvInferRuntime.h:3588
int32_t getTensorVectorizedDim(char const *tensorName, int32_t profileIndex) const noexcept
Return the dimension index that the buffer is vectorized of given profile, or -1 if the provided name...
Definition: NvInferRuntime.h:3221
char const * getName() const noexcept
Returns the name of the network associated with the engine.
Definition: NvInferRuntime.h:3236
ProfilingVerbosity getProfilingVerbosity() const noexcept
Return the ProfilingVerbosity the builder config was set to when the engine was built.
Definition: NvInferRuntime.h:3358
bool isShapeInferenceIO(char const *tensorName) const noexcept
True if tensor is required as input for shape calculations or is output from shape calculations.
Definition: NvInferRuntime.h:2953
int64_t getWeightStreamingScratchMemorySize() const noexcept
Returns the size of the scratch memory required by the current weight streaming budget.
Definition: NvInferRuntime.h:3574
int64_t getDeviceMemorySizeV2() const noexcept
Return the maximum device memory required by the context over all profiles.
Definition: NvInferRuntime.h:3008
int32_t getTensorVectorizedDim(char const *tensorName) const noexcept
Return the dimension index that the buffer is vectorized, or -1 if the provided name does not map to ...
Definition: NvInferRuntime.h:3205
int32_t getTensorComponentsPerElement(char const *tensorName, int32_t profileIndex) const noexcept
Return the number of components included in one element of given profile, or -1 if tensor is not vect...
Definition: NvInferRuntime.h:3112
int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept
Return the maximum device memory required by the context for a profile.
Definition: NvInferRuntime.h:3024
IRuntimeConfig * createRuntimeConfig() noexcept
Create a runtime config for TensorRT JIT. The caller is responsible for ownership of the returned IRu...
Definition: NvInferRuntime.h:2992
TensorFormat getTensorFormat(char const *tensorName) const noexcept
Return the tensor format, or TensorFormat::kLINEAR if the provided name does not map to an input or o...
Definition: NvInferRuntime.h:3127
IHostMemory * serialize() const noexcept
Serialize the network to a stream.
Definition: NvInferRuntime.h:2898
int64_t getEngineStat(EngineStat stat) const noexcept
Get engine statistics according to the given enum value.
Definition: NvInferRuntime.h:3641
TensorLocation getTensorLocation(char const *tensorName) const noexcept
Get whether an input or output tensor must be on GPU or CPU.
Definition: NvInferRuntime.h:2933
IEngineInspector * createEngineInspector() const noexcept
Create a new engine inspector which prints the layer information in an engine or an execution context...
Definition: NvInferRuntime.h:3368
int32_t getTensorBytesPerComponent(char const *tensorName, int32_t profileIndex) const noexcept
Return the number of bytes per component of an element given of given profile, or -1 if the tensor is...
Definition: NvInferRuntime.h:3073
HardwareCompatibilityLevel getHardwareCompatibilityLevel() const noexcept
Return the hardware compatibility level of this engine.
Definition: NvInferRuntime.h:3404
int32_t getNbOptimizationProfiles() const noexcept
Get the number of optimization profiles defined for this engine.
Definition: NvInferRuntime.h:3247
char const * getTensorFormatDesc(char const *tensorName, int32_t profileIndex) const noexcept
Return the human readable description of the tensor format of given profile, or empty string if the p...
Definition: NvInferRuntime.h:3188
TensorIOMode getTensorIOMode(char const *tensorName) const noexcept
Determine whether a tensor is an input or output tensor.
Definition: NvInferRuntime.h:2967
int32_t getNbLayers() const noexcept
Get the number of layers in the network.
Definition: NvInferRuntime.h:2884
int32_t getNbIOTensors() const noexcept
Return number of IO tensors.
Definition: NvInferRuntime.h:3381
int32_t getTensorComponentsPerElement(char const *tensorName) const noexcept
Return the number of components included in one element, or -1 if tensor is not vectorized or if the ...
Definition: NvInferRuntime.h:3094
bool isRefittable() const noexcept
Return true if an engine can be refit.
Definition: NvInferRuntime.h:3034
An IDimensionExpr represents an integer expression constructed from constants, input dimensions,...
Definition: NvInferRuntime.h:232
bool isConstant() const noexcept
Return true if expression is a build-time constant.
Definition: NvInferRuntime.h:237
bool isSizeTensor() const noexcept
Return true if this denotes the value of a size tensor.
Definition: NvInferRuntime.h:263
virtual ~IDimensionExpr() noexcept=default
apiv::VDimensionExpr * mImpl
Definition: NvInferRuntime.h:254
int64_t getConstantValue() const noexcept
Get the value of the constant.
Definition: NvInferRuntime.h:248
An engine inspector which prints out the layer information of an engine or an execution context.
Definition: NvInferRuntime.h:4696
char const * getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
Get a string describing the information about a specific layer in the current engine or the execution...
Definition: NvInferRuntime.h:4749
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:4806
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:4791
IExecutionContext const * getExecutionContext() const noexcept
Get the context currently being inspected.
Definition: NvInferRuntime.h:4724
apiv::VEngineInspector * mImpl
Definition: NvInferRuntime.h:4812
virtual ~IEngineInspector() noexcept=default
char const * getEngineInformation(LayerInformationFormat format) const noexcept
Get a string describing the information about all the layers in the current engine or the execution c...
Definition: NvInferRuntime.h:4772
Context for executing inference using an engine, with functionally unsafe features.
Definition: NvInferRuntime.h:3764
IOutputAllocator * getOutputAllocator(char const *tensorName) const noexcept
Get output allocator associated with output tensor of given name, or nullptr if the provided name doe...
Definition: NvInferRuntime.h:4382
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:4030
bool reportToProfiler() const noexcept
Calculate layer timing info for the current optimization profile in IExecutionContext and update the ...
Definition: NvInferRuntime.h:4149
virtual ~IExecutionContext() noexcept=default
void setDeviceMemory(void *memory) noexcept
Set the device memory for use by this execution context.
Definition: NvInferRuntime.h:3867
bool setTensorDebugState(char const *name, bool flag) noexcept
Set debug state of tensor given the tensor name.
Definition: NvInferRuntime.h:4589
char const * getName() const noexcept
Return the name of the execution context.
Definition: NvInferRuntime.h:3840
IGpuAllocator * getTemporaryStorageAllocator() const noexcept
Get allocator set by setTemporaryStorageAllocator.
Definition: NvInferRuntime.h:4430
void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
Set whether enqueue emits layer timing to the profiler.
Definition: NvInferRuntime.h:4107
bool setUnfusedTensorsDebugState(bool flag) noexcept
Turn the debug state of unfused tensors on or off.
Definition: NvInferRuntime.h:4640
Dims getTensorShape(char const *tensorName) const noexcept
Return the shape of the given input or output.
Definition: NvInferRuntime.h:3980
bool getDebugState(char const *name) const noexcept
Get the debug state.
Definition: NvInferRuntime.h:4601
bool setInputShape(char const *tensorName, Dims const &dims) noexcept
Set shape of given input.
Definition: NvInferRuntime.h:3943
bool executeV2(void *const *bindings) noexcept
Synchronously execute a network.
Definition: NvInferRuntime.h:4047
bool getEnqueueEmitsProfile() const noexcept
Get the enqueueEmitsProfile state.
Definition: NvInferRuntime.h:4119
void const * getTensorAddress(char const *tensorName) const noexcept
Get memory address bound to given input or output tensor, or nullptr if the provided name does not ma...
Definition: NvInferRuntime.h:4210
bool setOutputAllocator(char const *tensorName, IOutputAllocator *outputAllocator) noexcept
Set output allocator to use for output tensor of given name. Pass nullptr to outputAllocator to unset...
Definition: NvInferRuntime.h:4369
bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
Select an optimization profile for the current context with async semantics.
Definition: NvInferRuntime.h:4091
apiv::VExecutionContext * mImpl
Definition: NvInferRuntime.h:4656
bool setOutputTensorAddress(char const *tensorName, void *data) noexcept
Set the memory address for a given output tensor.
Definition: NvInferRuntime.h:4233
void setPersistentCacheLimit(size_t size) noexcept
Set the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:4470
size_t getPersistentCacheLimit() const noexcept
Get the maximum size for persistent cache usage.
Definition: NvInferRuntime.h:4481
bool setAllTensorsDebugState(bool flag) noexcept
Turn the debug state of all debug tensors on or off.
Definition: NvInferRuntime.h:4624
ICudaEngine const & getEngine() const noexcept
Get the associated engine.
Definition: NvInferRuntime.h:3816
ProfilingVerbosity getNvtxVerbosity() const noexcept
Get the NVTX verbosity of the execution context.
Definition: NvInferRuntime.h:4517
size_t updateDeviceMemorySizeForShapes() noexcept
Recompute the internal activation buffer sizes based on the current input shapes, and return the tota...
Definition: NvInferRuntime.h:4324
void setAuxStreams(cudaStream_t *auxStreams, int32_t nbStreams) noexcept
Set the auxiliary streams that TensorRT should launch kernels on in the next enqueueV3() call.
Definition: NvInferRuntime.h:4548
int64_t getMaxOutputSize(char const *tensorName) const noexcept
Get upper bound on an output tensor's size, in bytes, based on the current optimization profile and i...
Definition: NvInferRuntime.h:4400
int32_t inferShapes(int32_t nbMaxNames, char const **tensorNames) noexcept
Run shape calculations.
Definition: NvInferRuntime.h:4307
bool setDebugListener(IDebugListener *listener) noexcept
Set DebugListener for this execution context.
Definition: NvInferRuntime.h:4560
bool setTensorAddress(char const *tensorName, void *data) noexcept
Set memory address for given input or output tensor.
Definition: NvInferRuntime.h:4193
bool setTemporaryStorageAllocator(IGpuAllocator *allocator) noexcept
Specify allocator to use for internal temporary storage.
Definition: NvInferRuntime.h:4420
void * getOutputTensorAddress(char const *tensorName) const noexcept
Get memory address for given output.
Definition: NvInferRuntime.h:4274
bool enqueueV3(cudaStream_t stream) noexcept
Enqueue inference on a stream.
Definition: NvInferRuntime.h:4454
IDebugListener * getDebugListener() noexcept
Get the DebugListener of this execution context.
Definition: NvInferRuntime.h:4570
int32_t getOptimizationProfile() const noexcept
Get the index of the currently selected optimization profile.
Definition: NvInferRuntime.h:3925
bool setInputTensorAddress(char const *tensorName, void const *data) noexcept
Set memory address for given input.
Definition: NvInferRuntime.h:4255
bool getDebugSync() const noexcept
Get the debug sync flag.
Definition: NvInferRuntime.h:3786
bool setInputConsumedEvent(cudaEvent_t event) noexcept
Mark input as consumed.
Definition: NvInferRuntime.h:4340
Dims getTensorStrides(char const *tensorName) const noexcept
Return the strides of the buffer for the given tensor name.
Definition: NvInferRuntime.h:3910
bool setNvtxVerbosity(ProfilingVerbosity verbosity) noexcept
Set the verbosity of the NVTX markers in the execution context.
Definition: NvInferRuntime.h:4505
IProfiler * getProfiler() const noexcept
Get the profiler.
Definition: NvInferRuntime.h:3806
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:4015
void setDeviceMemoryV2(void *memory, int64_t size) noexcept
Set the device memory and its corresponding size for use by this execution context.
Definition: NvInferRuntime.h:3889
bool allInputDimensionsSpecified() const noexcept
Whether all dynamic dimensions of input tensors have been specified.
Definition: NvInferRuntime.h:3996
bool getUnfusedTensorsDebugState() const noexcept
Get the debug state of unfused tensors.
Definition: NvInferRuntime.h:4650
void setProfiler(IProfiler *profiler) noexcept
Set the profiler.
Definition: NvInferRuntime.h:3796
void setName(char const *name) noexcept
Set the name of the execution context.
Definition: NvInferRuntime.h:3830
cudaEvent_t getInputConsumedEvent() const noexcept
The event associated with consuming the input.
Definition: NvInferRuntime.h:4350
IRuntimeConfig * getRuntimeConfig() const noexcept
Get the runtime config object used during execution context creation.
Definition: NvInferRuntime.h:4611
Object for constructing IDimensionExpr.
Definition: NvInferRuntime.h:287
IDimensionExpr const * operation(DimensionOperation op, IDimensionExpr const &first, IDimensionExpr const &second) noexcept
Get the operation.
Definition: NvInferRuntime.h:303
virtual ~IExprBuilder() noexcept=default
IDimensionExpr const * constant(int64_t value) noexcept
Return pointer to IDimensionExpr for given value.
Definition: NvInferRuntime.h:292
apiv::VExprBuilder * mImpl
Definition: NvInferRuntime.h:310
IDimensionExpr const * declareSizeTensor(int32_t outputIndex, IDimensionExpr const &opt, IDimensionExpr const &upper)
Declare a size tensor at the given output index, with the specified auto-tuning formula and upper bou...
Definition: NvInferRuntime.h:338
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:142
void * data() const noexcept
A pointer to the raw data that is owned by the library.
Definition: NvInferRuntime.h:147
DataType type() const noexcept
The type of the memory that was allocated.
Definition: NvInferRuntime.h:159
std::size_t size() const noexcept
The size in bytes of the data that was allocated.
Definition: NvInferRuntime.h:153
apiv::VHostMemory * mImpl
Definition: NvInferRuntime.h:165
virtual ~IHostMemory() noexcept=default
A virtual base class to find a logger. Allows a plugin to find an instance of a logger if it needs to...
Definition: NvInferRuntime.h:4907
virtual ILogger * findLogger()=0
Get the logger used by the engine or execution context which called the plugin method.
virtual ~ILoggerFinder()=default
Application-implemented logging interface for the builder, refitter and runtime.
Definition: NvInferRuntime.h:1098
virtual ~ILogger()=default
Severity
The severity corresponding to a log message.
Definition: NvInferRuntime.h:1106
@ kWARNING
An application error has been discovered, but TensorRT has recovered or fallen back to a default.
@ kERROR
An application error has occurred.
@ kINFO
Informational messages with instructional information.
@ kINTERNAL_ERROR
An internal error has occurred. Execution is unrecoverable.
@ kVERBOSE
Verbose messages with debugging information.
virtual void log(Severity severity, AsciiChar const *msg) noexcept=0
A callback implemented by the application to handle logging messages;.
Forward declaration of IEngineInspector for use by other interfaces.
Definition: NvInferRuntime.h:51
INoCopy & operator=(INoCopy &&other)=delete
INoCopy(INoCopy const &other)=delete
INoCopy(INoCopy &&other)=delete
virtual ~INoCopy()=default
INoCopy & operator=(INoCopy const &other)=delete
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:2182
TRT_DEPRECATED int32_t const * getShapeValues(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2303
apiv::VOptimizationProfile * mImpl
Definition: NvInferRuntime.h:2419
Dims getDimensions(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:2223
TRT_DEPRECATED bool setShapeValues(char const *inputName, OptProfileSelector select, int32_t const *values, int32_t nbValues) noexcept
Set the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2275
virtual ~IOptimizationProfile() noexcept=default
float getExtraMemoryTarget() const noexcept
Get the extra memory target that has been defined for this profile.
Definition: NvInferRuntime.h:2333
bool setExtraMemoryTarget(float target) noexcept
Set a target for extra GPU memory that may be used by this profile.
Definition: NvInferRuntime.h:2321
bool setDimensions(char const *inputName, OptProfileSelector select, Dims const &dims) noexcept
Set the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:2211
bool isValid() const noexcept
Check whether the optimization profile can be passed to an IBuilderConfig object.
Definition: NvInferRuntime.h:2350
int64_t const * getShapeValuesV2(char const *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2413
bool setShapeValuesV2(char const *inputName, OptProfileSelector select, int64_t const *values, int32_t nbValues) noexcept
Set the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:2400
int32_t getNbShapeValues(char const *inputName) const noexcept
Get the number of values for an input shape tensor.
Definition: NvInferRuntime.h:2289
Single registration point for all plugins in an application. It is used to find plugin implementation...
Definition: NvInferRuntimeCommon.h:56
virtual TRT_DEPRECATED bool registerCreator(IPluginCreator &creator, AsciiChar const *const pluginNamespace) noexcept=0
Register a plugin creator implementing IPluginCreator. Returns false if any plugin creator with the s...
Similar to IPluginV2Ext, but with support for dynamic shapes.
Definition: NvInferRuntime.h:407
IPluginV2DynamicExt * clone() const noexcept override=0
Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin...
virtual ~IPluginV2DynamicExt() noexcept
Definition: NvInferRuntime.h:558
Plugin class for user-implemented layers.
Definition: NvInferRuntimePlugin.h:474
Updates weights in an engine.
Definition: NvInferRuntime.h:1774
bool setWeights(char const *layerName, WeightsRole role, Weights weights) noexcept
Specify new weights for a layer of given name. Returns true on success, or false if new weights are r...
Definition: NvInferRuntime.h:1793
bool refitCudaEngineAsync(cudaStream_t stream) noexcept
Enqueue weights refitting of the associated engine on the given stream.
Definition: NvInferRuntime.h:2104
int32_t getMaxThreads() const noexcept
get the maximum number of threads that can be used by the refitter.
Definition: NvInferRuntime.h:1983
TensorLocation getWeightsLocation(char const *weightsName) const noexcept
Get location for the weights associated with the given name.
Definition: NvInferRuntime.h:2042
bool setNamedWeights(char const *name, Weights weights) noexcept
Specify new weights of given name.
Definition: NvInferRuntime.h:1907
int32_t getAllWeights(int32_t size, char const **weightsNames) noexcept
Get names of all weights that could be refit.
Definition: NvInferRuntime.h:1943
ILogger * getLogger() const noexcept
get the logger with which the refitter was created
Definition: NvInferRuntime.h:1953
bool refitCudaEngine() noexcept
Refits associated engine.
Definition: NvInferRuntime.h:1810
int32_t getMissingWeights(int32_t size, char const **weightsNames) noexcept
Get names of missing weights.
Definition: NvInferRuntime.h:1927
int32_t getMissing(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of missing weights.
Definition: NvInferRuntime.h:1831
Weights getNamedWeights(char const *weightsName) const noexcept
Get weights associated with the given name.
Definition: NvInferRuntime.h:2026
bool unsetNamedWeights(char const *weightsName) noexcept
Unset weights associated with the given name.
Definition: NvInferRuntime.h:2058
Weights getWeightsPrototype(char const *weightsName) const noexcept
Get the Weights prototype associated with the given name.
Definition: NvInferRuntime.h:2122
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:1969
bool setNamedWeights(char const *name, Weights weights, TensorLocation location) noexcept
Specify new weights on a specified device of given name.
Definition: NvInferRuntime.h:2010
void setWeightsValidation(bool weightsValidation) noexcept
Set whether to validate weights during refitting.
Definition: NvInferRuntime.h:2074
apiv::VRefitter * mImpl
Definition: NvInferRuntime.h:2128
int32_t getAll(int32_t size, char const **layerNames, WeightsRole *roles) noexcept
Get description of all weights that could be refit.
Definition: NvInferRuntime.h:1848
virtual ~IRefitter() noexcept=default
bool getWeightsValidation() const noexcept
Get whether to validate weights values during refitting.
Definition: NvInferRuntime.h:2082
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:1867
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:1882
A class for Runtime cache currently used for TensorRT JIT compilation. This cache can be serialized a...
Definition: NvInferRuntime.h:2638
virtual ~IRuntimeCache() noexcept=default
bool deserialize(void const *blob, size_t size) noexcept
Deserialize the Runtime cache from a stream that contains serialized Runtime cache.
Definition: NvInferRuntime.h:2658
bool reset() noexcept
Reset the Runtime cache. Clears all content within the cache.
Definition: NvInferRuntime.h:2666
apiv::VRuntimeCache * mImpl
Definition: NvInferRuntime.h:2672
A class for runtime configuration. This class is used during execution context creation.
Definition: NvInferRuntime.h:2725
DynamicShapesKernelSpecializationStrategy getDynamicShapesKernelSpecializationStrategy() const noexcept
Return the dynamic shape specialization strategy of this config.
Definition: NvInferRuntime.h:2795
virtual ~IRuntimeConfig() noexcept=default
apiv::VRuntimeConfig * mImpl
Definition: NvInferRuntime.h:2802
IRuntimeCache * createRuntimeCache() const noexcept
Create an empty Runtime cache.
Definition: NvInferRuntime.h:2754
ExecutionContextAllocationStrategy getExecutionContextAllocationStrategy() const noexcept
Get the execution context allocation strategy.
Definition: NvInferRuntime.h:2744
bool setRuntimeCache(IRuntimeCache const &cache) noexcept
Set Runtime cache to the runtime config. Enables Runtime caching.
Definition: NvInferRuntime.h:2764
void setDynamicShapesKernelSpecializationStrategy(DynamicShapesKernelSpecializationStrategy dynamicShapesKernelSpecializationStrategy) noexcept
Set the dynamic shape kernel specialization strategy for this config.
Definition: NvInferRuntime.h:2784
IRuntimeCache * getRuntimeCache() const noexcept
Get the Runtime cache from the runtime config.
Definition: NvInferRuntime.h:2774
Allows a serialized functionally unsafe engine to be deserialized.
Definition: NvInferRuntime.h:1431
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:1580
IRuntime * loadRuntime(char const *path) noexcept
Load IRuntime from the file.
Definition: NvInferRuntime.h:1696
bool getEngineHostCodeAllowed() const noexcept
Get whether the runtime is allowed to deserialize engines with host executable code.
Definition: NvInferRuntime.h:1718
int64_t getEngineHeaderSize() const noexcept
Get size of engine header in bytes.
Definition: NvInferRuntime.h:1730
TempfileControlFlags getTempfileControlFlags() const noexcept
Get the tempfile control flags for this runtime.
Definition: NvInferRuntime.h:1668
void setEngineHostCodeAllowed(bool allowed) noexcept
Set whether the runtime is allowed to deserialize engines with host executable code.
Definition: NvInferRuntime.h:1708
virtual ~IRuntime() noexcept=default
void setTemporaryDirectory(char const *path) noexcept
Set the directory that will be used by this runtime for temporary files.
Definition: NvInferRuntime.h:1629
IPluginRegistry & getPluginRegistry() noexcept
Get the local plugin registry that can be used by the runtime.
Definition: NvInferRuntime.h:1678
apiv::VRuntime * mImpl
Definition: NvInferRuntime.h:1763
void setDLACore(int32_t dlaCore) noexcept
Sets the DLA core used by the network. Defaults to -1.
Definition: NvInferRuntime.h:1446
int32_t getNbDLACores() const noexcept
Returns number of DLA hardware cores accessible or 0 if DLA is unavailable.
Definition: NvInferRuntime.h:1464
EngineValidity getEngineValidity(void const *blob, int64_t blobSize, uint64_t *diagnostics) const noexcept
Check for engine validity by inspecting the serialized engine header.
Definition: NvInferRuntime.h:1757
ICudaEngine * deserializeCudaEngine(void const *blob, std::size_t size) noexcept
Deserialize an engine from host memory.
Definition: NvInferRuntime.h:1532
void setTempfileControlFlags(TempfileControlFlags flags) noexcept
Set the tempfile control flags for this runtime.
Definition: NvInferRuntime.h:1656
int32_t getDLACore() const noexcept
Get the DLA core that the engine executes on.
Definition: NvInferRuntime.h:1456
void setGpuAllocator(IGpuAllocator *allocator) noexcept
Set the GPU allocator.
Definition: NvInferRuntime.h:1480
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:1514
ICudaEngine * deserializeCudaEngine(IStreamReaderV2 &streamReader)
Deserialize an engine from a stream. IStreamReaderV2 is expected to support reading to both host and ...
Definition: NvInferRuntime.h:1555
ILogger * getLogger() const noexcept
get the logger with which the runtime was created
Definition: NvInferRuntime.h:1565
int32_t getMaxThreads() const noexcept
Get the maximum number of threads that can be used by the runtime.
Definition: NvInferRuntime.h:1594
char const * getTemporaryDirectory() const noexcept
Get the directory that will be used by this runtime for temporary files.
Definition: NvInferRuntime.h:1640
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:1499
Holds properties for configuring an engine to serialize the binary.
Definition: NvInferRuntime.h:2529
virtual ~ISerializationConfig() noexcept=default
bool clearFlag(SerializationFlag serializationFlag) noexcept
clear a serialization flag.
Definition: NvInferRuntime.h:2568
bool setFlag(SerializationFlag serializationFlag) noexcept
Set a serialization flag.
Definition: NvInferRuntime.h:2580
SerializationFlags getFlags() const noexcept
Get the serialization flags for this config.
Definition: NvInferRuntime.h:2556
bool getFlag(SerializationFlag serializationFlag) const noexcept
Returns true if the serialization flag is set.
Definition: NvInferRuntime.h:2592
apiv::VSerializationConfig * mImpl
Definition: NvInferRuntime.h:2598
An Interface class for version control.
Definition: NvInferRuntimeBase.h:276
IVersionedInterface & operator=(IVersionedInterface const &) &=default
Version information associated with a TRT interface.
Definition: NvInferRuntimeBase.h:241
Register the plugin creator to the registry The static registry object will be instantiated when the ...
Definition: NvInferRuntime.h:4882
PluginRegistrar()
Definition: NvInferRuntime.h:4884
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:124
DataType type
The type of the weights.
Definition: NvInferRuntime.h:126
int64_t count
The number of weights in the array.
Definition: NvInferRuntime.h:128
void const * values
The weight values, in a contiguous array.
Definition: NvInferRuntime.h:127
Definition: NvInferRuntime.h:3715
virtual bool processDebugTensor(void const *addr, TensorLocation location, DataType type, Dims const &shape, char const *name, cudaStream_t stream)=0
Callback function that is called when a debug tensor’s value is updated and the debug state of the te...
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:3720
~IDebugListener() override=default
Definition: NvInferRuntimeBase.h:413
Definition: NvInferRuntime.h:1166
virtual void * allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t) noexcept
A thread-safe callback implemented by the application to handle stream-ordered acquisition of GPU mem...
Definition: NvInferRuntime.h:1288
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:1329
virtual TRT_DEPRECATED bool deallocate(void *const memory) noexcept=0
A thread-safe callback implemented by the application to handle release of GPU memory.
~IGpuAllocator() override=default
virtual void * reallocate(void *const, uint64_t, uint64_t) noexcept
A thread-safe callback implemented by the application to resize an existing allocation.
Definition: NvInferRuntime.h:1235
virtual TRT_DEPRECATED void * allocate(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept=0
A thread-safe callback implemented by the application to handle acquisition of GPU memory.
virtual bool deallocateAsync(void *const memory, cudaStream_t) noexcept
A thread-safe callback implemented by the application to handle stream-ordered release of GPU memory.
Definition: NvInferRuntime.h:1321
Definition: NvInferRuntime.h:4928
bool deallocateAsync(void *const memory, cudaStream_t) noexcept override=0
A thread-safe callback implemented by the application to handle stream-ordered asynchronous release o...
void * allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t) noexcept override=0
A thread-safe callback implemented by the application to handle stream-ordered asynchronous acquisiti...
TRT_DEPRECATED void * allocate(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept override
A thread-safe callback implemented by the application to handle acquisition of GPU memory.
Definition: NvInferRuntime.h:5015
TRT_DEPRECATED bool deallocate(void *const memory) noexcept override
A thread-safe callback implemented by the application to handle release of GPU memory.
Definition: NvInferRuntime.h:5039
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:5047
~IGpuAsyncAllocator() override=default
Definition: NvInferRuntime.h:3653
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:3658
virtual void * reallocateOutputAsync(char const *tensorName, void *currentMemory, uint64_t size, uint64_t alignment, cudaStream_t)
Return a pointer to memory for an output tensor, or nullptr if memory cannot be allocated....
Definition: NvInferRuntime.h:3685
virtual void notifyShape(char const *tensorName, Dims const &dims) noexcept=0
Called by TensorRT when the shape of the output tensor is known.
Definition: NvInferRuntime.h:802
virtual ~IProfiler() noexcept
Definition: NvInferRuntime.h:813
virtual void reportLayerTime(char const *layerName, float ms) noexcept=0
Layer time reporting callback.
Definition: NvInferRuntime.h:631
~IStreamReader() override=default
IStreamReader & operator=(IStreamReader const &) &=default
IStreamReader & operator=(IStreamReader &&) &=default
virtual int64_t read(void *destination, int64_t nbBytes)=0
Read the next number of bytes in the stream.
IStreamReader(IStreamReader &&)=default
IStreamReader(IStreamReader const &)=default
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:643
Definition: NvInferRuntime.h:741
IStreamReaderV2 & operator=(IStreamReaderV2 const &) &=default
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:753
IStreamReaderV2(IStreamReaderV2 &&)=default
~IStreamReaderV2() override=default
virtual int64_t read(void *destination, int64_t nbBytes, cudaStream_t stream) noexcept=0
Read the next number of bytes in the stream asynchronously.
IStreamReaderV2(IStreamReaderV2 const &)=default
virtual bool seek(int64_t offset, SeekPosition where) noexcept=0
Sets the position of the stream to the given offset.
IStreamReaderV2 & operator=(IStreamReaderV2 &&) &=default
Definition: NvInferRuntime.h:666
IStreamWriter & operator=(IStreamWriter const &) &=default
IStreamWriter(IStreamWriter &&)=default
virtual int64_t write(void const *data, int64_t nbBytes)=0
write nbBytes of data into the stream.
IStreamWriter(IStreamWriter const &)=default
InterfaceInfo getInterfaceInfo() const noexcept final
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntime.h:678
IStreamWriter & operator=(IStreamWriter &&) &=default
~IStreamWriter() override=default
IRefitter * createInferRefitter(ICudaEngine &engine, ILogger &logger) noexcept
Create an instance of an IRefitter class.
Definition: NvInferRuntime.h:4862
IRuntime * createInferRuntime(ILogger &logger) noexcept
Create an instance of an IRuntime class.
Definition: NvInferRuntime.h:4851
The TensorRT API version 1 namespace.
uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferRuntime.h:2468
v_1_0::IOutputAllocator IOutputAllocator
Definition: NvInferRuntime.h:3710
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:76
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:179
@ kSUB
Substract the second element from the first.
@ kSUM
Sum of the two operands.
@ kPROD
Product of the two operands.
@ kFLOOR_DIV
Floor division of the first element by the second.
@ kEQUAL
1 if operands are equal, 0 otherwise.
@ kMIN
Minimum of the two operands.
@ kLESS
1 if first operand is less than second operand, 0 otherwise.
@ kMAX
Maximum of the two operands.
@ kCEIL_DIV
Division rounding up.
EngineInvalidityDiagnostics
Bitmask indicating the reason(s) why an engine is invalid.
Definition: NvInferRuntime.h:1399
@ kUNSUPPORTED_CC
Unsupported compute capability on current system.
@ kMALFORMED_ENGINE
Serialized engine does not conform to the expected format.
@ kINSUFFICIENT_GPU_MEMORY
Insufficient GPU memory to hold all engine weights.
@ kCUDA_ERROR
Incorrect installation of the CUDA driver or runtime.
@ kOLD_CUDA_DRIVER
CUDA driver too old (driver downgrade compared to when engine was built).
@ kOLD_CUDA_RUNTIME
CUDA runtime too old (runtime downgrade compared to when engine was built).
@ kVERSION_MISMATCH
TensorRT-RTX version mismatch to when engine was built.
TensorIOMode
Definition of tensor IO Mode.
Definition: NvInferRuntimeBase.h:656
HardwareCompatibilityLevel
Describes requirements of compatibility with GPU architectures other than that of the GPU on which th...
Definition: NvInfer.h:8957
SerializationFlag
List of valid flags that the engine can enable when serializing the bytes.
Definition: NvInferRuntime.h:2509
@ kEXCLUDE_WEIGHTS
Exclude the weights that can be refitted.
constexpr int32_t EnumMax< DynamicShapesKernelSpecializationStrategy >() noexcept
Maximum number of dynamic shape specialization strategies in DynamicShapesKernelSpecializationStrateg...
Definition: NvInferRuntime.h:2713
v_1_0::IStreamWriter IStreamWriter
Definition: NvInferRuntime.h:720
v_1_0::IProfiler IProfiler
Definition: NvInferRuntime.h:828
DynamicShapesKernelSpecializationStrategy
Different kernel specialization strategies for dynamic shapes.
Definition: NvInferRuntime.h:2687
SeekPosition
Controls the seek mode of IStreamReaderV2.
Definition: NvInferRuntime.h:727
@ kSET
From the beginning of the file.
@ kCUR
From the current position of the file.
@ kEND
From the tail of the file.
v_1_0::IStreamReaderV2 IStreamReaderV2
Definition: NvInferRuntime.h:797
uint32_t TempfileControlFlags
Represents a collection of one or more TempfileControlFlag values combined using bitwise-OR operation...
Definition: NvInferRuntime.h:906
EngineStat
The kind of engine statistics that queried from the ICudaEngine.
Definition: NvInferRuntime.h:2814
@ kTOTAL_WEIGHTS_SIZE
Return the total weight size in bytes.
@ kSTRIPPED_WEIGHTS_SIZE
Return the stripped weight size in bytes for engines built with BuilderFlag::kSTRIP_PLAN.
v_1_0::IGpuAllocator IGpuAllocator
Definition: NvInferRuntime.h:1365
EngineValidity
Whether a TensorRT-RTX engine is likely to be valid on the current system.
Definition: NvInferRuntime.h:1373
@ kINVALID
Engine is invalid on the current system.
@ kSUBOPTIMAL
Engine is likely to be valid on the current system, but may show reduced performance.
@ kVALID
Engine is likely to be valid on the current system, based on the information in the header.
constexpr int32_t EnumMax< WeightsRole >() noexcept
Maximum number of elements in WeightsRole enum.
Definition: NvInferRuntime.h:849
char_t AsciiChar
Definition: NvInferRuntimeBase.h:115
@ kV2_DYNAMICEXT
IPluginV2DynamicExt.
constexpr int32_t EnumMax< EngineStat >() noexcept
Maximum number of engine statistic kinds in EngineStat enum.
Definition: NvInferRuntime.h:2828
constexpr int32_t EnumMax< LayerInformationFormat >() noexcept
Definition: NvInferRuntime.h:4675
DataType
The type of weights and tensors.
Definition: NvInferRuntimeBase.h:143
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:860
v_1_0::IDebugListener IDebugListener
Definition: NvInferRuntime.h:3751
TempfileControlFlag
Flags used to control TensorRT's behavior when creating executable temporary files.
Definition: NvInferRuntime.h:883
@ kALLOW_IN_MEMORY_FILES
Allow creating and loading files in-memory (or unnamed files).
constexpr int32_t EnumMax< OptProfileSelector >() noexcept
Number of different values of OptProfileSelector enum.
Definition: NvInferRuntime.h:2154
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:838
@ kSHIFT
shift part of IScaleLayer
@ kANY
Any other weights role.
@ kBIAS
bias for IConvolutionLayer or IDeconvolutionLayer
@ kSCALE
scale part of IScaleLayer
@ kCONSTANT
weights for IConstantLayer
@ kKERNEL
kernel for IConvolutionLayer or IDeconvolutionLayer
constexpr int32_t EnumMax< ProfilingVerbosity >() noexcept
Maximum number of profile verbosity levels in ProfilingVerbosity enum.
Definition: NvInferRuntime.h:2488
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
Definition: NvInferRuntime.h:2480
@ kLAYER_NAMES_ONLY
Print only the layer names. This is the default setting.
@ kDETAILED
Print detailed layer information including layer names and layer parameters.
TacticSource
List of tactic sources for TensorRT.
Definition: NvInferRuntime.h:2431
TensorFormat PluginFormat
PluginFormat is reserved for backward compatibility.
Definition: NvInferRuntimePlugin.h:54
constexpr int32_t EnumMax< TempfileControlFlag >() noexcept
Maximum number of elements in TempfileControlFlag enum.
Definition: NvInferRuntime.h:895
uint32_t SerializationFlags
Represents one or more SerializationFlag values using binary OR operations, e.g., 1U << Serialization...
Definition: NvInferRuntime.h:2499
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntime.h:940
ExecutionContextAllocationStrategy
Different memory allocation behaviors for IExecutionContext.
Definition: NvInferRuntime.h:2613
@ kSTATIC
Default static allocation with the maximum size across all profiles.
@ kUSER_MANAGED
The user supplies custom allocation to the execution context.
@ kON_PROFILE_CHANGE
Reallocate for a profile when it's selected.
constexpr int32_t EnumMax< TacticSource >() noexcept
Maximum number of tactic sources in TacticSource enum.
Definition: NvInferRuntime.h:2457
LayerInformationFormat
The format in which the IEngineInspector prints the layer information.
Definition: NvInferRuntime.h:4667
@ kJSON
Print layer information in JSON format.
@ kONELINE
Print layer information in one line per layer.
v_1_0::IGpuAsyncAllocator IGpuAsyncAllocator
Definition: NvInferRuntime.h:5068
v_1_0::IStreamReader IStreamReader
Definition: NvInferRuntime.h:710
AllocatorFlag
Allowed type of memory allocation.
Definition: NvInferRuntime.h:1064
@ kRESIZABLE
TensorRT may call realloc() on this allocation.
constexpr int32_t EnumMax< DeviceType >() noexcept
Maximum number of elements in DeviceType enum.
Definition: NvInferRuntime.h:867
constexpr int32_t EnumMax< DimensionOperation >() noexcept
Maximum number of elements in DimensionOperation enum.
Definition: NvInferRuntime.h:193
constexpr int32_t EnumMax< ExecutionContextAllocationStrategy >() noexcept
Maximum number of memory allocation strategies in ExecutionContextAllocationStrategy enum.
Definition: NvInferRuntime.h:2625
constexpr int32_t EnumMax< SerializationFlag >() noexcept
Maximum number of serialization flags in SerializationFlag enum.
Definition: NvInferRuntime.h:2516
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntime.h:204
@ kHOST
Data stored on host.
@ kDEVICE
Data stored on device.
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:2142
@ kOPT
This is used to set or get the value that is used in the optimization (kernel selection).
@ kMIN
This is used to set or get the minimum permitted value for dynamic dimensions etc.
@ kMAX
This is used to set or get the maximum permitted value for dynamic dimensions etc.
uint32_t AllocatorFlags
Definition: NvInferRuntime.h:1080
Summarizes tensors that a plugin might see for an input or output.
Definition: NvInferRuntime.h:362
Dims min
Lower bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:367
Dims max
Upper bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:370
Dims opt
Optimum value of tensor’s dimensions specified for auto-tuning.
Definition: NvInferRuntime.h:373
PluginTensorDesc desc
Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of ...
Definition: NvInferRuntime.h:364
Fields that a plugin might see for an input or output.
Definition: NvInferRuntimePlugin.h:73
Declaration of EnumMaxImpl struct to store maximum number of elements in an enumeration type.
Definition: NvInferRuntimeBase.h:128

  Copyright © 2024 NVIDIA Corporation
  Privacy Policy | Manage My Privacy | Do Not Sell or Share My Data | Terms of Service | Accessibility | Corporate Policies | Product Security | Contact