TensorRT 10.16.0
NvInferSafeRuntime.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright (c) 1993-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: Apache-2.0
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18// @brief Main header file for the NVIDIA Safe Runtime API.
19// This file provides the primary interface for users to interact with the NVIDIA Safe Runtime API.
20// It includes the necessary definitions, classes, and functions for creating and managing safe graphs,
21// executing inference, and handling errors.
22// Users should include this header file in their application to access the Safe Runtime API functionality.
23
24#ifndef NV_INFER_SAFE_RUNTIME_H
25#define NV_INFER_SAFE_RUNTIME_H
27#include "NvInferSafePlugin.h"
28#include "NvInferSafeRecorder.h"
29#include <algorithm>
30#include <cuda_fp16.h>
31
32namespace nvinfer2
33{
34namespace safe
35{
36using half_t = __half;
37
51{
52public:
53 TypedArray() noexcept
54 : mType(DataType::kFLOAT)
55 , mData(nullptr)
56 , mBufferSize(0U)
57 {
58 }
59 // Shallow copy of TypedArray is allowed
60 TypedArray(TypedArray const&) = default;
61 TypedArray(TypedArray&&) = default;
62 TypedArray& operator=(TypedArray const&) & = default;
64 ~TypedArray() noexcept = default;
65
67 TypedArray(float* ptr, uint64_t const bufferSize) noexcept
68 : mType(DataType::kFLOAT)
69 , mData(ptr)
70 , mBufferSize(bufferSize)
71 {
72 }
73
75 TypedArray(half_t* ptr, uint64_t const bufferSize) noexcept
76 : mType(DataType::kHALF)
77 , mData(ptr)
78 , mBufferSize(bufferSize)
79 {
80 }
81
83 TypedArray(int64_t* ptr, uint64_t const bufferSize) noexcept
84 : mType(DataType::kINT64)
85 , mData(ptr)
86 , mBufferSize(bufferSize)
87 {
88 }
89
91 TypedArray(int32_t* ptr, uint64_t const bufferSize) noexcept
92 : mType(DataType::kINT32)
93 , mData(ptr)
94 , mBufferSize(bufferSize)
95 {
96 }
97
99 TypedArray(int8_t* ptr, uint64_t const bufferSize) noexcept
100 : mType(DataType::kINT8)
101 , mData(ptr)
102 , mBufferSize(bufferSize)
103 {
104 }
105
107 TypedArray(uint8_t* ptr, uint64_t const bufferSize) noexcept
108 : mType(DataType::kUINT8)
109 , mData(ptr)
110 , mBufferSize(bufferSize)
111 {
112 }
114 TypedArray(bool* ptr, uint64_t const bufferSize) noexcept
115 : mType(DataType::kBOOL)
116 , mData(ptr)
117 , mBufferSize(bufferSize)
118 {
119 }
120
121 // NOLINTBEGIN to avoid clang-tidy requiring [[nodiscard]]
123 DataType getType() const noexcept
124 {
125 return mType;
126 }
127
130 float* getFloat() const noexcept
131 {
132 if (mType == DataType::kFLOAT)
133 {
134 return static_cast<float*>(mData);
135 }
136 return nullptr;
137 }
138
141 half_t* getHalf() const noexcept
142 {
143 if (mType == DataType::kHALF)
144 {
145 return static_cast<half_t*>(mData);
146 }
147 return nullptr;
148 }
149
152 int64_t* getInt64() const noexcept
153 {
154 if (mType == DataType::kINT64)
155 {
156 return static_cast<int64_t*>(mData);
157 }
158 return nullptr;
159 }
160
163 int32_t* getInt32() const noexcept
164 {
165 if (mType == DataType::kINT32)
166 {
167 return static_cast<int32_t*>(mData);
168 }
169 return nullptr;
170 }
171
174 int8_t* getInt8() const noexcept
175 {
176 if (mType == DataType::kINT8)
177 {
178 return static_cast<int8_t*>(mData);
179 }
180 return nullptr;
181 }
182
185 uint8_t* getUint8() const noexcept
186 {
187 if (mType == DataType::kUINT8)
188 {
189 return static_cast<uint8_t*>(mData);
190 }
191 return nullptr;
192 }
193
196 bool* getBool() const noexcept
197 {
198 if (mType == DataType::kBOOL)
199 {
200 return static_cast<bool*>(mData);
201 }
202 return nullptr;
203 }
204
206 uint64_t getSize() const noexcept
207 {
208 return mBufferSize;
209 }
210
213 void* getData() const noexcept
214 {
215 return mData;
216 }
217 // NOLINTEND
218
219private:
220 DataType mType; // This is the current type of the data.
221 void* mData; // This is the pointer that holds the data.
222 uint64_t mBufferSize; // This is the size of the buffer in bytes that holds the data.
223};
224
239{
240public:
243 static constexpr int32_t MAX_DIMS{9};
244
246 int32_t nbDims;
247
249 int64_t d[MAX_DIMS];
250};
251
252inline bool operator==(PhysicalDims const& d0, PhysicalDims const& d1) noexcept
253{
254 return d0.nbDims == d1.nbDims && (d0.nbDims <= 0 || std::equal(d0.d, d0.d + d0.nbDims, d1.d));
255}
256
257inline bool operator!=(PhysicalDims const& d0, PhysicalDims const& d1) noexcept
258{
259 return !(d0 == d1);
260}
261
285{
287 AsciiChar const* tensorName{nullptr};
295 DataType dataType{DataType::kFLOAT};
297 uint64_t bytesPerComponent{0U};
301 int64_t vectorizedDim{-1};
303 uint64_t sizeInBytes{0U};
305 TensorIOMode ioMode{TensorIOMode::kNONE};
314};
315
329{
330public:
331 ITRTGraph(ITRTGraph const&) = delete;
332 ITRTGraph(ITRTGraph&&) = delete;
333 ITRTGraph& operator=(ITRTGraph const&) & = delete;
335
339 virtual ~ITRTGraph() noexcept = default;
340
359 virtual ErrorCode clone(ITRTGraph*& graph, ISafeRecorder& recorder) noexcept = 0;
360
374 virtual ErrorCode getScratchMemorySize(size_t& size) const noexcept = 0;
375
386 virtual ErrorCode getTRTManagedScratch(bool& flag) const noexcept = 0;
387
403 virtual ErrorCode setScratchMemory(void* memory) noexcept = 0;
404
418 virtual ErrorCode getScratchMemory(void*& memory) noexcept = 0;
419
428 virtual ErrorCode getNbIOTensors(int64_t& nb) const noexcept = 0;
429
439 virtual ErrorCode getIOTensorName(AsciiChar const*& name, size_t const index) const noexcept = 0;
440
454 TensorDescriptor& desc, AsciiChar const* const tensorName) const noexcept = 0;
455
468 virtual ErrorCode getIOTensorDescriptor(TensorDescriptor& desc, int32_t const index) const noexcept = 0;
469
480 virtual ErrorCode setIOTensorAddress(AsciiChar const* const tensorName, TypedArray const& tensor) noexcept = 0;
481
492 virtual ErrorCode setIOTensorAddress(int32_t const index, TypedArray const& tensor) noexcept = 0;
493
504 virtual ErrorCode getIOTensorAddress(AsciiChar const* const tensorName, TypedArray& tensor) noexcept = 0;
505
516 virtual ErrorCode getIOTensorAddress(int32_t const index, TypedArray& tensor) noexcept = 0;
517
527 virtual ErrorCode setInputConsumedEvent(cudaEvent_t event) noexcept = 0;
528
539 virtual ErrorCode getInputConsumedEvent(cudaEvent_t& event) const noexcept = 0;
540
552 virtual ErrorCode getErrorBuffer(RuntimeErrorInformation*& buffer) const noexcept = 0;
553
562 virtual ErrorCode getSafeRecorder(ISafeRecorder*& recorder) const noexcept = 0;
563
574 virtual ErrorCode getNbIOProfiles(int64_t& nb) const noexcept = 0;
575
590 virtual ErrorCode setIOProfile(int64_t profileIndex) noexcept = 0;
591
601 virtual ErrorCode getIOProfile(int64_t& profileIndex) const noexcept = 0;
602
613 virtual ErrorCode getNbAuxStreams(int32_t& nbStreams) const noexcept = 0;
614
638 virtual ErrorCode setAuxStreams(cudaStream_t* auxStreams, int32_t nbStreams) noexcept = 0;
639
652 virtual ErrorCode executeAsync(cudaStream_t stream) noexcept = 0;
653
664 virtual ErrorCode sync() noexcept = 0;
665
666protected:
667 ITRTGraph() = default;
668};
669
694extern "C" ErrorCode createTRTGraph(ITRTGraph*& graph, void const* buffer, int64_t bufferSize, ISafeRecorder& recorder,
695 bool trtManagedScratch = true, ISafeMemAllocator* allocator = nullptr) noexcept;
696
707extern "C" ErrorCode destroyTRTGraph(ITRTGraph*& graph) noexcept;
708
709} // namespace safe
710} // namespace nvinfer2
711#endif /* NV_INFER_SAFE_RUNTIME_H */
Definition: NvInferRuntimeBase.h:219
Application-implemented class for controlling memory allocation on the GPU/CPU.
Definition: NvInferSafeMemAllocator.h:86
Interface for extended recorder which allows error, warn, debug, or info messages to be recorded.
Definition: NvInferSafeRecorder.h:76
Abstract Interface for a functionally safe graph for executing inference on a built network.
Definition: NvInferSafeRuntime.h:329
virtual ErrorCode getNbIOTensors(int64_t &nb) const noexcept=0
This function returns the total number of input and output tensor for the current graph.
ITRTGraph & operator=(ITRTGraph const &) &=delete
virtual ErrorCode executeAsync(cudaStream_t stream) noexcept=0
execute one inference of this graph.
virtual ErrorCode getSafeRecorder(ISafeRecorder *&recorder) const noexcept=0
This function retrieves the ISafeRecorder for the current graph.
ITRTGraph(ITRTGraph const &)=delete
virtual ErrorCode setIOProfile(int64_t profileIndex) noexcept=0
This function selects the active IOProfile for the graph. If this function is not called,...
virtual ErrorCode setScratchMemory(void *memory) noexcept=0
This function sets the scratch memory for the graph. This should only be called if scratch memory is ...
virtual ErrorCode setInputConsumedEvent(cudaEvent_t event) noexcept=0
This function sets a cudaEvent on the current graph that triggers when the input is consumed....
virtual ErrorCode setIOTensorAddress(AsciiChar const *const tensorName, TypedArray const &tensor) noexcept=0
This function assigns a user allocated device memory block for an input tensor to the graph based on ...
virtual ErrorCode getIOProfile(int64_t &profileIndex) const noexcept=0
This function retrieves the index of the current active IOProfile for the graph.
virtual ErrorCode setAuxStreams(cudaStream_t *auxStreams, int32_t nbStreams) noexcept=0
Set the auxiliary streams that TensorRT should use to run kernels on.
virtual ErrorCode getErrorBuffer(RuntimeErrorInformation *&buffer) const noexcept=0
This function retrieves the RuntimeErrorInformation (for async error) buffer for the current graph....
ITRTGraph & operator=(ITRTGraph &&) &=delete
virtual ErrorCode getScratchMemorySize(size_t &size) const noexcept=0
This function returns the scratch memory size (in bytes) needed to store all the intermediate tensors...
virtual ErrorCode getTRTManagedScratch(bool &flag) const noexcept=0
This function returns the trtManagedScratch flag provided in createTRTGraph call.
ITRTGraph(ITRTGraph &&)=delete
virtual ErrorCode sync() noexcept=0
synchronize one inference of this graph.
virtual ErrorCode getNbAuxStreams(int32_t &nbStreams) const noexcept=0
Return the number of auxiliary streams used by this graph.
virtual ErrorCode getScratchMemory(void *&memory) noexcept=0
This function gets the scratch memory for the graph. This should only be called if scratch memory is ...
virtual ErrorCode getIOTensorName(AsciiChar const *&name, size_t const index) const noexcept=0
This function returns the name of a tensor for a given index.
virtual ErrorCode getIOTensorAddress(AsciiChar const *const tensorName, TypedArray &tensor) noexcept=0
This function gets the memory address for an user provided input tensor to the graph based on its nam...
virtual ~ITRTGraph() noexcept=default
A shallow destructor of ITRTGraph.
virtual ErrorCode getNbIOProfiles(int64_t &nb) const noexcept=0
This function returns the total number of IO tensor profiles for the current graph.
virtual ErrorCode getInputConsumedEvent(cudaEvent_t &event) const noexcept=0
This function retrieves the cudaEvent on the current graph that triggers when the input is fully cons...
virtual ErrorCode getIOTensorDescriptor(TensorDescriptor &desc, AsciiChar const *const tensorName) const noexcept=0
This function should return a TensorDescriptor which contains all the information about the tensor ba...
virtual ErrorCode clone(ITRTGraph *&graph, ISafeRecorder &recorder) noexcept=0
Specialized Graph shallow copy.
Structure to define the physical dimensions of a tensor with support for up to 9 dimensions.
Definition: NvInferSafeRuntime.h:239
int32_t nbDims
The rank (number of dimensions).
Definition: NvInferSafeRuntime.h:246
static constexpr int32_t MAX_DIMS
Definition: NvInferSafeRuntime.h:243
int64_t d[MAX_DIMS]
The extent of each dimension.
Definition: NvInferSafeRuntime.h:249
A standard_layout and trivially_copyable typed array that knows the data type and size it is holding.
Definition: NvInferSafeRuntime.h:51
TypedArray() noexcept
Definition: NvInferSafeRuntime.h:53
DataType getType() const noexcept
This method returns the current type of the data.
Definition: NvInferSafeRuntime.h:123
TypedArray & operator=(TypedArray const &) &=default
int32_t * getInt32() const noexcept
Retrieves the data as int32_t*. It should only be called when the current type is kINT32....
Definition: NvInferSafeRuntime.h:163
int8_t * getInt8() const noexcept
Retrieves the data as int8_t*. It should only be called when the current type is kINT8....
Definition: NvInferSafeRuntime.h:174
TypedArray(bool *ptr, uint64_t const bufferSize) noexcept
sets the data to a bool ptr. It also sets the current type to kBOOL
Definition: NvInferSafeRuntime.h:114
TypedArray(TypedArray &&)=default
bool * getBool() const noexcept
Retrieves the data as bool*. It should only be called when the current type is kBOOL....
Definition: NvInferSafeRuntime.h:196
half_t * getHalf() const noexcept
Retrieves the data as half_t*. It should only be called when the current type is kFLOAT....
Definition: NvInferSafeRuntime.h:141
void * getData() const noexcept
Retrieves the data regardless of the type.
Definition: NvInferSafeRuntime.h:213
TypedArray(half_t *ptr, uint64_t const bufferSize) noexcept
sets the data to a half_t ptr. It also sets the current type to kHALF
Definition: NvInferSafeRuntime.h:75
TypedArray(int32_t *ptr, uint64_t const bufferSize) noexcept
sets the data to a int32_t ptr. It also sets the current type to kINT32
Definition: NvInferSafeRuntime.h:91
uint8_t * getUint8() const noexcept
Retrieves the data as uint8_t*. It should only be called when the current type is kUINT8....
Definition: NvInferSafeRuntime.h:185
TypedArray(TypedArray const &)=default
~TypedArray() noexcept=default
uint64_t getSize() const noexcept
Retrieves the size of the array in bytes.
Definition: NvInferSafeRuntime.h:206
TypedArray & operator=(TypedArray &&) &=default
TypedArray(int64_t *ptr, uint64_t const bufferSize) noexcept
sets the data to a int64_t ptr. It also sets the current type to kINT64
Definition: NvInferSafeRuntime.h:83
int64_t * getInt64() const noexcept
Retrieves the data as int64_t*. It should only be called when the current type is kINT64....
Definition: NvInferSafeRuntime.h:152
TypedArray(int8_t *ptr, uint64_t const bufferSize) noexcept
sets the data to a int8_t ptr. It also sets the current type to kINT8
Definition: NvInferSafeRuntime.h:99
TypedArray(uint8_t *ptr, uint64_t const bufferSize) noexcept
sets the data to a uint8_t ptr. It also sets the current type to kUINT8
Definition: NvInferSafeRuntime.h:107
float * getFloat() const noexcept
Retrieves the data as float*. It should only be called when the current type is kFLOAT....
Definition: NvInferSafeRuntime.h:130
ErrorCode
Error codes that can be returned by TensorRT during execution.
Definition: NvInferRuntimeBase.h:312
TensorIOMode
Definition of tensor IO Mode.
Definition: NvInferRuntimeBase.h:659
DataType
The type of weights and tensors. The datatypes other than kBOOL, kINT32, and kINT64 are "activation d...
Definition: NvInferRuntimeBase.h:146
MemoryPlacement
Enum to describe the placement of the memory region.
Definition: NvInferSafeMemAllocator.h:49
@ kNONE
Invalid or unspecified placement (used for error checking)
bool operator==(PhysicalDims const &d0, PhysicalDims const &d1) noexcept
Definition: NvInferSafeRuntime.h:252
nvinfer1::AsciiChar AsciiChar
Definition: NvInferForwardDecl.h:37
__half half_t
Definition: NvInferSafeRuntime.h:36
ErrorCode destroyTRTGraph(ITRTGraph *&graph) noexcept
Toplevel graph destructor.
ErrorCode createTRTGraph(ITRTGraph *&graph, void const *buffer, int64_t bufferSize, ISafeRecorder &recorder, bool trtManagedScratch=true, ISafeMemAllocator *allocator=nullptr) noexcept
The C factory function which serves as an entry point to TRT that creates an instance of a ITRTGraph ...
bool operator!=(PhysicalDims const &d0, PhysicalDims const &d1) noexcept
Definition: NvInferSafeRuntime.h:257
Definition: NvInferConsistency.h:25
Holds information about runtime errors that occur during asynchronous kernel execution.
Definition: NvInferSafeRecorder.h:221
A simple record summarizing various properties of a network IO Tensor.
Definition: NvInferSafeRuntime.h:285
MemoryPlacement memPlacement
Enum to denote whether the Tensor memory is allocated on the GPU, CPU, or CPU_PINNED.
Definition: NvInferSafeRuntime.h:307
AsciiChar const * tensorName
Name of the IO Tensor.
Definition: NvInferSafeRuntime.h:287
uint64_t bytesPerComponent
The size of the tensor data type in bytes (4 for float and int32, 2 for half, 1 for int8)
Definition: NvInferSafeRuntime.h:297
DataType dataType
Definition: NvInferSafeRuntime.h:295
PhysicalDims strideOrder
The order in which the dimensions are laid out in memory.
Definition: NvInferSafeRuntime.h:309
Dims userShape
Definition: NvInferSafeRuntime.h:313
uint64_t sizeInBytes
Total size in bytes for the IO Tensor.
Definition: NvInferSafeRuntime.h:303
uint64_t componentsPerVector
The vector length (in scalars) for a vectorized tensor, 1 if the tensor is not vectorized.
Definition: NvInferSafeRuntime.h:299
int64_t vectorizedDim
The dimension index along which the tensor is vectorized, -1 if the tensor is not vectorized.
Definition: NvInferSafeRuntime.h:301
PhysicalDims shape
Definition: NvInferSafeRuntime.h:290
TensorIOMode ioMode
Enum to denote whether the Tensor is for input or output.
Definition: NvInferSafeRuntime.h:305
PhysicalDims stride
Stride vector for each element of the IO Tensor.
Definition: NvInferSafeRuntime.h:292

  Copyright © 2024 NVIDIA Corporation
  Privacy Policy | Manage My Privacy | Do Not Sell or Share My Data | Terms of Service | Accessibility | Corporate Policies | Product Security | Contact