TensorRT 10.0.0
NvInferRuntimeBase.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef NV_INFER_RUNTIME_BASE_H
19#define NV_INFER_RUNTIME_BASE_H
20
21#include "NvInferVersion.h"
22#include <cstddef>
23#include <cstdint>
24#include <cuda_runtime_api.h>
25
26// Items that are marked as deprecated will be removed in a future release.
27#if __cplusplus >= 201402L
28#define TRT_DEPRECATED [[deprecated]]
29#if __GNUC__ < 6
30#define TRT_DEPRECATED_ENUM
31#else
32#define TRT_DEPRECATED_ENUM TRT_DEPRECATED
33#endif
34#ifdef _MSC_VER
35#define TRT_DEPRECATED_API __declspec(dllexport)
36#else
37#define TRT_DEPRECATED_API [[deprecated]] __attribute__((visibility("default")))
38#endif
39#else
40#ifdef _MSC_VER
41#define TRT_DEPRECATED
42#define TRT_DEPRECATED_ENUM
43#define TRT_DEPRECATED_API __declspec(dllexport)
44#else
45#define TRT_DEPRECATED __attribute__((deprecated))
46#define TRT_DEPRECATED_ENUM
47#define TRT_DEPRECATED_API __attribute__((deprecated, visibility("default")))
48#endif
49#endif
50
51// Defines which symbols are exported
52#ifdef TENSORRT_BUILD_LIB
53#ifdef _MSC_VER
54#define TENSORRTAPI __declspec(dllexport)
55#else
56#define TENSORRTAPI __attribute__((visibility("default")))
57#endif
58#else
59#define TENSORRTAPI
60#endif
61#define TRTNOEXCEPT
70
72
73extern "C"
74{
76 struct cublasContext;
78 struct cudnnContext;
79}
80
83#define NV_TENSORRT_VERSION_INT(major, minor, patch) ((major) *10000L + (minor) *100L + (patch) *1L)
84
87#define NV_TENSORRT_VERSION NV_TENSORRT_VERSION_INT(NV_TENSORRT_MAJOR, NV_TENSORRT_MINOR, NV_TENSORRT_PATCH)
88
94namespace nvinfer1
95{
97using char_t = char;
98
102
104namespace v_1_0
105{
106class IErrorRecorder;
107}
109
110namespace impl
111{
113template <typename T>
115} // namespace impl
116
118template <typename T>
119constexpr int32_t EnumMax() noexcept
120{
122}
123
128enum class DataType : int32_t
129{
131 kFLOAT = 0,
132
134 kHALF = 1,
135
137 kINT8 = 2,
138
140 kINT32 = 3,
141
143 kBOOL = 4,
144
157 kUINT8 = 5,
158
161 kFP8 = 6,
162
164 kBF16 = 7,
165
167 kINT64 = 8,
168
170 kINT4 = 9,
171};
172
173namespace impl
174{
176template <>
178{
180 static constexpr int32_t kVALUE = 10;
181};
182} // namespace impl
183
195{
196public:
198 static constexpr int32_t MAX_DIMS{8};
199
201 int32_t nbDims;
202
204 int64_t d[MAX_DIMS];
205};
206
210using Dims = Dims64;
211
242enum class TensorFormat : int32_t
243{
249 kLINEAR = 0,
250
255 kCHW2 = 1,
256
260 kHWC8 = 2,
261
275 kCHW4 = 3,
276
284 kCHW16 = 4,
285
293 kCHW32 = 5,
294
299 kDHWC8 = 6,
300
305 kCDHW32 = 7,
306
310 kHWC = 8,
311
320 kDLA_LINEAR = 9,
321
335 kDLA_HWC4 = 10,
336
341 kHWC16 = 11,
342
347 kDHWC = 12
348};
349
350using InterfaceKind = char const*;
351
358{
359public:
361 int32_t major;
362 int32_t minor;
363};
364
370enum class APILanguage : int32_t
371{
372 kCPP = 0,
373 kPYTHON = 1
374};
375
376namespace impl
377{
379template <>
381{
383 static constexpr int32_t kVALUE = 2;
384};
385} // namespace impl
386
393{
394public:
400 virtual APILanguage getAPILanguage() const noexcept
401 {
402 return APILanguage::kCPP;
403 }
404
408 virtual InterfaceInfo getInterfaceInfo() const noexcept = 0;
409
410 virtual ~IVersionedInterface() noexcept = default;
411
412protected:
416 IVersionedInterface& operator=(IVersionedInterface const&) & = default;
417 IVersionedInterface& operator=(IVersionedInterface&&) & = default;
418};
419
420namespace impl
421{
423template <>
425{
427 static constexpr int32_t kVALUE = 13;
428};
429} // namespace impl
430
431
437enum class AllocatorFlag : int32_t
438{
440 kRESIZABLE = 0,
441};
442
443namespace impl
444{
446template <>
448{
450 static constexpr int32_t kVALUE = 1;
451};
452} // namespace impl
453
454using AllocatorFlags = uint32_t;
455
458namespace v_1_0
459{
460
462{
463public:
489 uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept = 0;
490
491 ~IGpuAllocator() override = default;
492 IGpuAllocator() = default;
493
531 virtual void* reallocate(void* const /*baseAddr*/, uint64_t /*alignment*/, uint64_t /*newSize*/) noexcept
532 {
533 return nullptr;
534 }
535
554 TRT_DEPRECATED virtual bool deallocate(void* const memory) noexcept = 0;
555
584 virtual void* allocateAsync(
585 uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t /*stream*/) noexcept
586 {
587 return allocate(size, alignment, flags);
588 }
617 virtual bool deallocateAsync(void* const memory, cudaStream_t /*stream*/) noexcept
618 {
619 return deallocate(memory);
620 }
621
625 InterfaceInfo getInterfaceInfo() const noexcept override
626 {
627 return {"IGpuAllocator", 1, 0};
628 }
629
630protected:
631 // @cond SuppressDoxyWarnings
632 IGpuAllocator(IGpuAllocator const&) = default;
633 IGpuAllocator(IGpuAllocator&&) = default;
634 IGpuAllocator& operator=(IGpuAllocator const&) & = default;
635 IGpuAllocator& operator=(IGpuAllocator&&) & = default;
636 // @endcond
637};
638
639} // namespace v_1_0
640
662
676{
677public:
683 enum class Severity : int32_t
684 {
686 kINTERNAL_ERROR = 0,
688 kERROR = 1,
690 kWARNING = 2,
692 kINFO = 3,
694 kVERBOSE = 4,
695 };
696
715 virtual void log(Severity severity, AsciiChar const* msg) noexcept = 0;
716
717 ILogger() = default;
718 virtual ~ILogger() = default;
719
720protected:
721// @cond SuppressDoxyWarnings
722 ILogger(ILogger const&) = default;
723 ILogger(ILogger&&) = default;
724 ILogger& operator=(ILogger const&) & = default;
725 ILogger& operator=(ILogger&&) & = default;
726// @endcond
727};
728
729namespace impl
730{
732template <>
733struct EnumMaxImpl<ILogger::Severity>
734{
736 static constexpr int32_t kVALUE = 5;
737};
738} // namespace impl
739
745enum class ErrorCode : int32_t
746{
750 kSUCCESS = 0,
751
756
761 kINTERNAL_ERROR = 2,
762
768
776 kINVALID_CONFIG = 4,
777
784
790
798
807
820 kINVALID_STATE = 9,
821
833
834};
835
836namespace impl
837{
839template <>
841{
843 static constexpr int32_t kVALUE = 11;
844};
845} // namespace impl
846
847namespace v_1_0
848{
850{
851public:
855 InterfaceInfo getInterfaceInfo() const noexcept override
856 {
857 return InterfaceInfo{"IErrorRecorder", 1, 0};
858 }
859
863 using ErrorDesc = char const*;
864
868 static constexpr size_t kMAX_DESC_LENGTH{127U};
869
873 using RefCount = int32_t;
874
875 IErrorRecorder() = default;
876 ~IErrorRecorder() noexcept override = default;
877
878 // Public API used to retrieve information from the error recorder.
879
903 virtual int32_t getNbErrors() const noexcept = 0;
904
923 virtual ErrorCode getErrorCode(int32_t errorIdx) const noexcept = 0;
924
946 virtual ErrorDesc getErrorDesc(int32_t errorIdx) const noexcept = 0;
947
962 virtual bool hasOverflowed() const noexcept = 0;
963
978 virtual void clear() noexcept = 0;
979
980 // API used by TensorRT to report Error information to the application.
981
1008 virtual bool reportError(ErrorCode val, ErrorDesc desc) noexcept = 0;
1009
1026 virtual RefCount incRefCount() noexcept = 0;
1027
1044 virtual RefCount decRefCount() noexcept = 0;
1045
1046protected:
1047 // @cond SuppressDoxyWarnings
1048 IErrorRecorder(IErrorRecorder const&) = default;
1049 IErrorRecorder(IErrorRecorder&&) = default;
1050 IErrorRecorder& operator=(IErrorRecorder const&) & = default;
1051 IErrorRecorder& operator=(IErrorRecorder&&) & = default;
1052 // @endcond
1053}; // class IErrorRecorder
1054} // namespace v_1_0
1055
1083using IErrorRecorder = v_1_0::IErrorRecorder;
1084
1090enum class TensorIOMode : int32_t
1091{
1093 kNONE = 0,
1094
1096 kINPUT = 1,
1097
1099 kOUTPUT = 2
1100};
1101
1102namespace v_1_0
1103{
1105{
1106public:
1111 ~IStreamReader() override = default;
1112 IStreamReader() = default;
1113
1117 InterfaceInfo getInterfaceInfo() const noexcept override
1118 {
1119 return InterfaceInfo{"IStreamReader", 1, 0};
1120 }
1121
1130 virtual int64_t read(void* destination, int64_t nbBytes) = 0;
1131
1132protected:
1133 IStreamReader(IStreamReader const&) = default;
1137};
1138} // namespace v_1_0
1139
1149
1150namespace v_1_0
1151{
1152
1154{
1155public:
1159 InterfaceInfo getInterfaceInfo() const noexcept override
1160 {
1161 return InterfaceInfo{"IPluginResource", 1, 0};
1162 }
1176 virtual int32_t release() noexcept = 0;
1177
1190 virtual IPluginResource* clone() noexcept = 0;
1191
1192 ~IPluginResource() noexcept override = default;
1193
1194 IPluginResource() = default;
1197 IPluginResource& operator=(IPluginResource const&) & = default;
1198 IPluginResource& operator=(IPluginResource&&) & = default;
1199}; // class IPluginResource
1200} // namespace v_1_0
1201
1211
1212namespace impl
1213{
1215template <>
1217{
1218 // Declaration of kVALUE that represents maximum number of elements in TensorIOMode enum
1219 static constexpr int32_t kVALUE = 3;
1220};
1221} // namespace impl
1222} // namespace nvinfer1
1223
1229extern "C" TENSORRTAPI int32_t getInferLibVersion() noexcept;
1230
1231#endif // NV_INFER_RUNTIME_BASE_H
#define TENSORRTAPI
Definition: NvInferRuntimeBase.h:59
int32_t getInferLibVersion() noexcept
Return the library version number.
#define TRT_DEPRECATED
Definition: NvInferRuntimeBase.h:45
Definition: NvInferRuntimeBase.h:195
static constexpr int32_t MAX_DIMS
The maximum rank (number of dimensions) supported for a tensor.
Definition: NvInferRuntimeBase.h:198
int64_t d[MAX_DIMS]
The extent of each dimension.
Definition: NvInferRuntimeBase.h:204
int32_t nbDims
The rank (number of dimensions).
Definition: NvInferRuntimeBase.h:201
Application-implemented logging interface for the builder, refitter and runtime.
Definition: NvInferRuntimeBase.h:676
virtual ~ILogger()=default
Severity
The severity corresponding to a log message.
Definition: NvInferRuntimeBase.h:684
virtual void log(Severity severity, AsciiChar const *msg) noexcept=0
A callback implemented by the application to handle logging messages;.
An Interface class for version control.
Definition: NvInferRuntimeBase.h:393
virtual InterfaceInfo getInterfaceInfo() const noexcept=0
Return version information associated with this interface. Applications must not override this method...
virtual APILanguage getAPILanguage() const noexcept
The language used to build the implementation of this Interface.
Definition: NvInferRuntimeBase.h:400
IVersionedInterface & operator=(IVersionedInterface const &) &=default
Version information associated with a TRT interface.
Definition: NvInferRuntimeBase.h:358
InterfaceKind kind
Definition: NvInferRuntimeBase.h:360
int32_t major
Definition: NvInferRuntimeBase.h:361
int32_t minor
Definition: NvInferRuntimeBase.h:362
Definition: NvInferRuntimeBase.h:850
char const * ErrorDesc
A typedef of a C-style string for reporting error descriptions.
Definition: NvInferRuntimeBase.h:863
~IErrorRecorder() noexcept override=default
int32_t RefCount
A typedef of a 32-bit integer for reference counting.
Definition: NvInferRuntimeBase.h:873
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntimeBase.h:855
Definition: NvInferRuntimeBase.h:462
virtual void * allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t) noexcept
A thread-safe callback implemented by the application to handle stream-ordered acquisition of GPU mem...
Definition: NvInferRuntimeBase.h:584
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntimeBase.h:625
virtual TRT_DEPRECATED bool deallocate(void *const memory) noexcept=0
A thread-safe callback implemented by the application to handle release of GPU memory.
~IGpuAllocator() override=default
virtual void * reallocate(void *const, uint64_t, uint64_t) noexcept
A thread-safe callback implemented by the application to resize an existing allocation.
Definition: NvInferRuntimeBase.h:531
virtual TRT_DEPRECATED void * allocate(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept=0
A thread-safe callback implemented by the application to handle acquisition of GPU memory.
virtual bool deallocateAsync(void *const memory, cudaStream_t) noexcept
A thread-safe callback implemented by the application to handle stream-ordered release of GPU memory.
Definition: NvInferRuntimeBase.h:617
Definition: NvInferRuntimeBase.h:1154
virtual int32_t release() noexcept=0
Free the underlying resource.
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntimeBase.h:1159
Definition: NvInferRuntimeBase.h:1105
~IStreamReader() override=default
IStreamReader & operator=(IStreamReader const &) &=default
IStreamReader & operator=(IStreamReader &&) &=default
virtual int64_t read(void *destination, int64_t nbBytes)=0
Read the next number of bytes in the stream.
IStreamReader(IStreamReader &&)=default
IStreamReader(IStreamReader const &)=default
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntimeBase.h:1117
The TensorRT API version 1 namespace.
ErrorCode
Error codes that can be returned by TensorRT during execution.
Definition: NvInferRuntimeBase.h:746
TensorIOMode
Definition of tensor IO Mode.
Definition: NvInferRuntimeBase.h:1091
@ kOUTPUT
Tensor is output by the engine.
@ kINPUT
Tensor is input to the engine.
APILanguage
Programming language used in the implementation of a TRT interface.
Definition: NvInferRuntimeBase.h:371
char_t AsciiChar
Definition: NvInferRuntimeBase.h:101
char char_t
char_t is the type used by TensorRT to represent all valid characters.
Definition: NvInferRuntimeBase.h:97
DataType
The type of weights and tensors.
Definition: NvInferRuntimeBase.h:129
@ kINT64
Signed 64-bit integer type.
@ kFLOAT
32-bit floating point format.
@ kBOOL
8-bit boolean. 0 = false, 1 = true, other values undefined.
@ kHALF
IEEE 16-bit floating-point format – has a 5 bit exponent and 11 bit significand.
@ kINT8
Signed 8-bit integer representing a quantized floating-point value.
@ kBF16
Brain float – has an 8 bit exponent and 8 bit significand.
@ kINT4
Signed 4-bit integer type.
@ kINT32
Signed 32-bit integer format.
char const * InterfaceKind
Definition: NvInferRuntimeBase.h:350
v_1_0::IGpuAllocator IGpuAllocator
Definition: NvInferRuntimeBase.h:661
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntimeBase.h:243
constexpr int32_t EnumMax() noexcept
Maximum number of elements in an enumeration type.
Definition: NvInferRuntimeBase.h:119
v_1_0::IStreamReader IStreamReader
Definition: NvInferRuntimeBase.h:1148
AllocatorFlag
Allowed type of memory allocation.
Definition: NvInferRuntimeBase.h:438
@ kRESIZABLE
TensorRT may call realloc() on this allocation.
v_1_0::IErrorRecorder IErrorRecorder
Definition: NvInferRuntimeBase.h:108
uint32_t AllocatorFlags
Definition: NvInferRuntimeBase.h:454
Declaration of EnumMaxImpl struct to store maximum number of elements in an enumeration type.
Definition: NvInferRuntimeBase.h:114