TensorRT 10.0.1
NvInferRuntimeBase.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18#ifndef NV_INFER_RUNTIME_BASE_H
19#define NV_INFER_RUNTIME_BASE_H
20
21#include "NvInferVersion.h"
22#include <cstddef>
23#include <cstdint>
24#include <cuda_runtime_api.h>
25
26// Items that are marked as deprecated will be removed in a future release.
27#if __cplusplus >= 201402L
28#define TRT_DEPRECATED [[deprecated]]
29#if __GNUC__ < 6
30#define TRT_DEPRECATED_ENUM
31#else
32#define TRT_DEPRECATED_ENUM TRT_DEPRECATED
33#endif
34#ifdef _MSC_VER
35#define TRT_DEPRECATED_API __declspec(dllexport)
36#else
37#define TRT_DEPRECATED_API [[deprecated]] __attribute__((visibility("default")))
38#endif
39#else
40#ifdef _MSC_VER
41#define TRT_DEPRECATED
42#define TRT_DEPRECATED_ENUM
43#define TRT_DEPRECATED_API __declspec(dllexport)
44#else
45#define TRT_DEPRECATED __attribute__((deprecated))
46#define TRT_DEPRECATED_ENUM
47#define TRT_DEPRECATED_API __attribute__((deprecated, visibility("default")))
48#endif
49#endif
50
51// Defines which symbols are exported
52#ifdef TENSORRT_BUILD_LIB
53#ifdef _MSC_VER
54#define TENSORRTAPI __declspec(dllexport)
55#else
56#define TENSORRTAPI __attribute__((visibility("default")))
57#endif
58#else
59#define TENSORRTAPI
60#endif
61#define TRTNOEXCEPT
73#if !defined(NV_INFER_INTERNAL_INCLUDE_RUNTIME_BASE) && !defined(TRT_VCAST_SAFE)
74static_assert(false, "Do not directly include this file. Include NvInferRuntime.h or NvInferSafeRuntime.h or NvInferConsistency.h or NvInferPluginUtils.h");
75#endif
76
78
79extern "C"
80{
82 struct cublasContext;
84 struct cudnnContext;
85}
86
89#define NV_TENSORRT_VERSION_INT(major, minor, patch) ((major) *10000L + (minor) *100L + (patch) *1L)
90
93#define NV_TENSORRT_VERSION NV_TENSORRT_VERSION_INT(NV_TENSORRT_MAJOR, NV_TENSORRT_MINOR, NV_TENSORRT_PATCH)
94
100namespace nvinfer1
101{
103using char_t = char;
104
108
110namespace v_1_0
111{
112class IErrorRecorder;
113}
115
116namespace impl
117{
119template <typename T>
121} // namespace impl
122
124template <typename T>
125constexpr int32_t EnumMax() noexcept
126{
128}
129
134enum class DataType : int32_t
135{
137 kFLOAT = 0,
138
140 kHALF = 1,
141
143 kINT8 = 2,
144
146 kINT32 = 3,
147
149 kBOOL = 4,
150
163 kUINT8 = 5,
164
167 kFP8 = 6,
168
170 kBF16 = 7,
171
173 kINT64 = 8,
174
176 kINT4 = 9,
177};
178
179namespace impl
180{
182template <>
184{
186 static constexpr int32_t kVALUE = 10;
187};
188} // namespace impl
189
201{
202public:
204 static constexpr int32_t MAX_DIMS{8};
205
207 int32_t nbDims;
208
210 int64_t d[MAX_DIMS];
211};
212
216using Dims = Dims64;
217
248enum class TensorFormat : int32_t
249{
255 kLINEAR = 0,
256
261 kCHW2 = 1,
262
266 kHWC8 = 2,
267
281 kCHW4 = 3,
282
290 kCHW16 = 4,
291
299 kCHW32 = 5,
300
305 kDHWC8 = 6,
306
311 kCDHW32 = 7,
312
316 kHWC = 8,
317
326 kDLA_LINEAR = 9,
327
341 kDLA_HWC4 = 10,
342
347 kHWC16 = 11,
348
353 kDHWC = 12
354};
355
356using InterfaceKind = char const*;
357
364{
365public:
367 int32_t major;
368 int32_t minor;
369};
370
376enum class APILanguage : int32_t
377{
378 kCPP = 0,
379 kPYTHON = 1
380};
381
382namespace impl
383{
385template <>
387{
389 static constexpr int32_t kVALUE = 2;
390};
391} // namespace impl
392
399{
400public:
406 virtual APILanguage getAPILanguage() const noexcept
407 {
408 return APILanguage::kCPP;
409 }
410
414 virtual InterfaceInfo getInterfaceInfo() const noexcept = 0;
415
416 virtual ~IVersionedInterface() noexcept = default;
417
418protected:
422 IVersionedInterface& operator=(IVersionedInterface const&) & = default;
423 IVersionedInterface& operator=(IVersionedInterface&&) & = default;
424};
425
426namespace impl
427{
429template <>
431{
433 static constexpr int32_t kVALUE = 13;
434};
435} // namespace impl
436
437
443enum class AllocatorFlag : int32_t
444{
446 kRESIZABLE = 0,
447};
448
449namespace impl
450{
452template <>
454{
456 static constexpr int32_t kVALUE = 1;
457};
458} // namespace impl
459
460using AllocatorFlags = uint32_t;
461
464namespace v_1_0
465{
466
468{
469public:
495 uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept = 0;
496
497 ~IGpuAllocator() override = default;
498 IGpuAllocator() = default;
499
537 virtual void* reallocate(void* const /*baseAddr*/, uint64_t /*alignment*/, uint64_t /*newSize*/) noexcept
538 {
539 return nullptr;
540 }
541
560 TRT_DEPRECATED virtual bool deallocate(void* const memory) noexcept = 0;
561
590 virtual void* allocateAsync(
591 uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t /*stream*/) noexcept
592 {
593 return allocate(size, alignment, flags);
594 }
623 virtual bool deallocateAsync(void* const memory, cudaStream_t /*stream*/) noexcept
624 {
625 return deallocate(memory);
626 }
627
631 InterfaceInfo getInterfaceInfo() const noexcept override
632 {
633 return {"IGpuAllocator", 1, 0};
634 }
635
636protected:
637 // @cond SuppressDoxyWarnings
638 IGpuAllocator(IGpuAllocator const&) = default;
639 IGpuAllocator(IGpuAllocator&&) = default;
640 IGpuAllocator& operator=(IGpuAllocator const&) & = default;
641 IGpuAllocator& operator=(IGpuAllocator&&) & = default;
642 // @endcond
643};
644
645} // namespace v_1_0
646
668
682{
683public:
689 enum class Severity : int32_t
690 {
692 kINTERNAL_ERROR = 0,
694 kERROR = 1,
696 kWARNING = 2,
698 kINFO = 3,
700 kVERBOSE = 4,
701 };
702
721 virtual void log(Severity severity, AsciiChar const* msg) noexcept = 0;
722
723 ILogger() = default;
724 virtual ~ILogger() = default;
725
726protected:
727// @cond SuppressDoxyWarnings
728 ILogger(ILogger const&) = default;
729 ILogger(ILogger&&) = default;
730 ILogger& operator=(ILogger const&) & = default;
731 ILogger& operator=(ILogger&&) & = default;
732// @endcond
733};
734
735namespace impl
736{
738template <>
739struct EnumMaxImpl<ILogger::Severity>
740{
742 static constexpr int32_t kVALUE = 5;
743};
744} // namespace impl
745
751enum class ErrorCode : int32_t
752{
756 kSUCCESS = 0,
757
762
767 kINTERNAL_ERROR = 2,
768
774
782 kINVALID_CONFIG = 4,
783
790
796
804
813
826 kINVALID_STATE = 9,
827
839
840};
841
842namespace impl
843{
845template <>
847{
849 static constexpr int32_t kVALUE = 11;
850};
851} // namespace impl
852
853namespace v_1_0
854{
856{
857public:
861 InterfaceInfo getInterfaceInfo() const noexcept override
862 {
863 return InterfaceInfo{"IErrorRecorder", 1, 0};
864 }
865
869 using ErrorDesc = char const*;
870
876 static constexpr size_t kMAX_DESC_LENGTH{127U};
877
881 using RefCount = int32_t;
882
883 IErrorRecorder() = default;
884 ~IErrorRecorder() noexcept override = default;
885
886 // Public API used to retrieve information from the error recorder.
887
911 virtual int32_t getNbErrors() const noexcept = 0;
912
931 virtual ErrorCode getErrorCode(int32_t errorIdx) const noexcept = 0;
932
954 virtual ErrorDesc getErrorDesc(int32_t errorIdx) const noexcept = 0;
955
970 virtual bool hasOverflowed() const noexcept = 0;
971
986 virtual void clear() noexcept = 0;
987
988 // API used by TensorRT to report Error information to the application.
989
1016 virtual bool reportError(ErrorCode val, ErrorDesc desc) noexcept = 0;
1017
1034 virtual RefCount incRefCount() noexcept = 0;
1035
1052 virtual RefCount decRefCount() noexcept = 0;
1053
1054protected:
1055 // @cond SuppressDoxyWarnings
1056 IErrorRecorder(IErrorRecorder const&) = default;
1057 IErrorRecorder(IErrorRecorder&&) = default;
1058 IErrorRecorder& operator=(IErrorRecorder const&) & = default;
1059 IErrorRecorder& operator=(IErrorRecorder&&) & = default;
1060 // @endcond
1061}; // class IErrorRecorder
1062} // namespace v_1_0
1063
1091using IErrorRecorder = v_1_0::IErrorRecorder;
1092
1098enum class TensorIOMode : int32_t
1099{
1101 kNONE = 0,
1102
1104 kINPUT = 1,
1105
1107 kOUTPUT = 2
1108};
1109
1110namespace v_1_0
1111{
1113{
1114public:
1119 ~IStreamReader() override = default;
1120 IStreamReader() = default;
1121
1125 InterfaceInfo getInterfaceInfo() const noexcept override
1126 {
1127 return InterfaceInfo{"IStreamReader", 1, 0};
1128 }
1129
1138 virtual int64_t read(void* destination, int64_t nbBytes) = 0;
1139
1140protected:
1141 IStreamReader(IStreamReader const&) = default;
1145};
1146} // namespace v_1_0
1147
1157
1158namespace v_1_0
1159{
1160
1162{
1163public:
1167 InterfaceInfo getInterfaceInfo() const noexcept override
1168 {
1169 return InterfaceInfo{"IPluginResource", 1, 0};
1170 }
1184 virtual int32_t release() noexcept = 0;
1185
1198 virtual IPluginResource* clone() noexcept = 0;
1199
1200 ~IPluginResource() noexcept override = default;
1201
1202 IPluginResource() = default;
1205 IPluginResource& operator=(IPluginResource const&) & = default;
1206 IPluginResource& operator=(IPluginResource&&) & = default;
1207}; // class IPluginResource
1208} // namespace v_1_0
1209
1219
1220namespace impl
1221{
1223template <>
1225{
1226 // Declaration of kVALUE that represents maximum number of elements in TensorIOMode enum
1227 static constexpr int32_t kVALUE = 3;
1228};
1229} // namespace impl
1230} // namespace nvinfer1
1231
1237extern "C" TENSORRTAPI int32_t getInferLibVersion() noexcept;
1238
1239#endif // NV_INFER_RUNTIME_BASE_H
#define TENSORRTAPI
Definition: NvInferRuntimeBase.h:59
int32_t getInferLibVersion() noexcept
Return the library version number.
#define TRT_DEPRECATED
Definition: NvInferRuntimeBase.h:45
Definition: NvInferRuntimeBase.h:201
static constexpr int32_t MAX_DIMS
The maximum rank (number of dimensions) supported for a tensor.
Definition: NvInferRuntimeBase.h:204
int64_t d[MAX_DIMS]
The extent of each dimension.
Definition: NvInferRuntimeBase.h:210
int32_t nbDims
The rank (number of dimensions).
Definition: NvInferRuntimeBase.h:207
Application-implemented logging interface for the builder, refitter and runtime.
Definition: NvInferRuntimeBase.h:682
virtual ~ILogger()=default
Severity
The severity corresponding to a log message.
Definition: NvInferRuntimeBase.h:690
virtual void log(Severity severity, AsciiChar const *msg) noexcept=0
A callback implemented by the application to handle logging messages;.
An Interface class for version control.
Definition: NvInferRuntimeBase.h:399
virtual InterfaceInfo getInterfaceInfo() const noexcept=0
Return version information associated with this interface. Applications must not override this method...
virtual APILanguage getAPILanguage() const noexcept
The language used to build the implementation of this Interface.
Definition: NvInferRuntimeBase.h:406
IVersionedInterface & operator=(IVersionedInterface const &) &=default
Version information associated with a TRT interface.
Definition: NvInferRuntimeBase.h:364
InterfaceKind kind
Definition: NvInferRuntimeBase.h:366
int32_t major
Definition: NvInferRuntimeBase.h:367
int32_t minor
Definition: NvInferRuntimeBase.h:368
Definition: NvInferRuntimeBase.h:856
char const * ErrorDesc
A typedef of a C-style string for reporting error descriptions.
Definition: NvInferRuntimeBase.h:869
~IErrorRecorder() noexcept override=default
int32_t RefCount
A typedef of a 32-bit integer for reference counting.
Definition: NvInferRuntimeBase.h:881
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntimeBase.h:861
Definition: NvInferRuntimeBase.h:468
virtual void * allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, cudaStream_t) noexcept
A thread-safe callback implemented by the application to handle stream-ordered acquisition of GPU mem...
Definition: NvInferRuntimeBase.h:590
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntimeBase.h:631
virtual TRT_DEPRECATED bool deallocate(void *const memory) noexcept=0
A thread-safe callback implemented by the application to handle release of GPU memory.
~IGpuAllocator() override=default
virtual void * reallocate(void *const, uint64_t, uint64_t) noexcept
A thread-safe callback implemented by the application to resize an existing allocation.
Definition: NvInferRuntimeBase.h:537
virtual TRT_DEPRECATED void * allocate(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept=0
A thread-safe callback implemented by the application to handle acquisition of GPU memory.
virtual bool deallocateAsync(void *const memory, cudaStream_t) noexcept
A thread-safe callback implemented by the application to handle stream-ordered release of GPU memory.
Definition: NvInferRuntimeBase.h:623
Definition: NvInferRuntimeBase.h:1162
virtual int32_t release() noexcept=0
Free the underlying resource.
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntimeBase.h:1167
Definition: NvInferRuntimeBase.h:1113
~IStreamReader() override=default
IStreamReader & operator=(IStreamReader const &) &=default
IStreamReader & operator=(IStreamReader &&) &=default
virtual int64_t read(void *destination, int64_t nbBytes)=0
Read the next number of bytes in the stream.
IStreamReader(IStreamReader &&)=default
IStreamReader(IStreamReader const &)=default
InterfaceInfo getInterfaceInfo() const noexcept override
Return version information associated with this interface. Applications must not override this method...
Definition: NvInferRuntimeBase.h:1125
The TensorRT API version 1 namespace.
ErrorCode
Error codes that can be returned by TensorRT during execution.
Definition: NvInferRuntimeBase.h:752
TensorIOMode
Definition of tensor IO Mode.
Definition: NvInferRuntimeBase.h:1099
@ kOUTPUT
Tensor is output by the engine.
@ kINPUT
Tensor is input to the engine.
APILanguage
Programming language used in the implementation of a TRT interface.
Definition: NvInferRuntimeBase.h:377
char_t AsciiChar
Definition: NvInferRuntimeBase.h:107
char char_t
char_t is the type used by TensorRT to represent all valid characters.
Definition: NvInferRuntimeBase.h:103
DataType
The type of weights and tensors.
Definition: NvInferRuntimeBase.h:135
@ kINT64
Signed 64-bit integer type.
@ kFLOAT
32-bit floating point format.
@ kBOOL
8-bit boolean. 0 = false, 1 = true, other values undefined.
@ kHALF
IEEE 16-bit floating-point format – has a 5 bit exponent and 11 bit significand.
@ kINT8
Signed 8-bit integer representing a quantized floating-point value.
@ kBF16
Brain float – has an 8 bit exponent and 8 bit significand.
@ kINT4
Signed 4-bit integer type.
@ kINT32
Signed 32-bit integer format.
char const * InterfaceKind
Definition: NvInferRuntimeBase.h:356
v_1_0::IGpuAllocator IGpuAllocator
Definition: NvInferRuntimeBase.h:667
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntimeBase.h:249
constexpr int32_t EnumMax() noexcept
Maximum number of elements in an enumeration type.
Definition: NvInferRuntimeBase.h:125
v_1_0::IStreamReader IStreamReader
Definition: NvInferRuntimeBase.h:1156
AllocatorFlag
Allowed type of memory allocation.
Definition: NvInferRuntimeBase.h:444
@ kRESIZABLE
TensorRT may call realloc() on this allocation.
v_1_0::IErrorRecorder IErrorRecorder
Definition: NvInferRuntimeBase.h:114
uint32_t AllocatorFlags
Definition: NvInferRuntimeBase.h:460
Declaration of EnumMaxImpl struct to store maximum number of elements in an enumeration type.
Definition: NvInferRuntimeBase.h:120

  Copyright © 2024 NVIDIA Corporation
  Privacy Policy | Manage My Privacy | Do Not Sell or Share My Data | Terms of Service | Accessibility | Corporate Policies | Product Security | Contact