NVIDIA DeepStream SDK API Reference

7.0 Release
nvdsinfer_backend.h
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
4  *
5  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
6  * property and proprietary rights in and to this material, related
7  * documentation and any modifications thereto. Any use, reproduction,
8  * disclosure or distribution of this material and related documentation
9  * without an express license agreement from NVIDIA CORPORATION or
10  * its affiliates is strictly prohibited.
11  */
12 
13 #ifndef __NVDSINFER_BACKEND_H__
14 #define __NVDSINFER_BACKEND_H__
15 
16 #include <stdarg.h>
17 #include <condition_variable>
18 #include <memory>
19 #include <mutex>
20 #include <queue>
21 
22 #include <cuda_runtime_api.h>
23 
24 #include <NvCaffeParser.h>
25 #include <NvInfer.h>
26 #include <NvInferRuntime.h>
27 
28 #include "nvdsinfer_func_utils.h"
29 
30 /* This file provides backend inference interface for abstracting implementation
31  * details in various cases like inferencing on implicit batch dims/full dims
32  * network, inferencing on DLA etc. This file also provides helper classes for
33  * managing the lifecycle of CUDA resources like streams, buffers, events. */
34 
35 namespace nvdsinfer {
36 
41 {
42 public:
43  explicit CudaStream(uint flag = cudaStreamDefault, int priority = 0);
44  ~CudaStream();
45  operator cudaStream_t() { return m_Stream; }
46  cudaStream_t& ptr() { return m_Stream; }
48 
49 private:
50  void move_copy(CudaStream&& o)
51  {
52  m_Stream = o.m_Stream;
53  o.m_Stream = nullptr;
54  }
55  DISABLE_CLASS_COPY(CudaStream);
56 
57  cudaStream_t m_Stream = nullptr;
58 };
59 
63 class CudaEvent
64 {
65 public:
66  explicit CudaEvent(uint flag = cudaEventDefault);
67  ~CudaEvent();
68  operator cudaEvent_t() { return m_Event; }
69  cudaEvent_t& ptr() { return m_Event; }
71 
72 private:
73  void move_copy(CudaEvent&& o)
74  {
75  m_Event = o.m_Event;
76  o.m_Event = nullptr;
77  }
78  DISABLE_CLASS_COPY(CudaEvent);
79 
80  cudaEvent_t m_Event = nullptr;
81 };
82 
87 {
88 public:
89  virtual ~CudaBuffer() = default;
90  size_t bytes() const { return m_Size; }
91 
92  template <typename T>
93  T* ptr()
94  {
95  return (T*)m_Buf;
96  }
97 
98  void* ptr() { return m_Buf; }
100 
101 protected:
102  explicit CudaBuffer(size_t s) : m_Size(s) {}
104  {
105  m_Buf = o.m_Buf;
106  o.m_Buf = nullptr;
107  m_Size = o.m_Size;
108  o.m_Size = 0;
109  }
111  void* m_Buf = nullptr;
112  size_t m_Size = 0;
113 };
114 
119 {
120 public:
121  explicit CudaDeviceBuffer(size_t size);
123 };
124 
129 {
130 public:
131  explicit CudaHostBuffer(size_t size);
132  ~CudaHostBuffer();
133 };
134 
139 {
140 public:
141  InferBatchBuffer() = default;
142  virtual ~InferBatchBuffer() = default;
143 
144  /* Get device buffer pointers for bound layers associated with this batch. */
145  virtual std::vector<void*>& getDeviceBuffers() = 0;
146  /* Get the data type of the buffer(layer) for a bound layer having index
147  * `bindingIndex`. */
148  virtual NvDsInferDataType getDataType(int bindingIndex = 0) const = 0;
149  /* Get the batch dimensions for the buffer allocated for a bound layer having
150  * index `bindingIndex. */
151  virtual NvDsInferBatchDims getBatchDims(int bindingIndex = 0) const = 0;
152 
153 private:
154  DISABLE_CLASS_COPY(InferBatchBuffer);
155 };
156 
169 {
170 public:
171  BackendContext() = default;
172  virtual ~BackendContext() = default;
173 
174  /* Initialize the backend context. */
175  virtual NvDsInferStatus initialize() = 0;
176  /* Get the number of bound layers for the engine. */
177  virtual int getNumBoundLayers() = 0;
178 
179  /* Get information for a bound layer with index `bindingIdx`. */
180  virtual const NvDsInferBatchDimsLayerInfo& getLayerInfo(int bindingIdx) = 0;
181  /* Get binding index for a bound layer with name `bindingName`. */
182  virtual int getLayerIdx(const std::string& bindingName) = 0;
183 
184  /* Returns if the bound layer at index `bindingIdx` can support the
185  * provided batch dimensions. */
186  virtual bool canSupportBatchDims(
187  int bindingIdx, const NvDsInferBatchDims& batchDims) = 0;
188 
189  /* Get the min/max/optimal batch dimensions for a bound layer. */
190  virtual NvDsInferBatchDims getMaxBatchDims(int bindingIdx) = 0;
191  virtual NvDsInferBatchDims getMinBatchDims(int bindingIdx) = 0;
192  virtual NvDsInferBatchDims getOptBatchDims(int bindingIdx) = 0;
193 
194  /* Enqueue a batched buffer for inference. */
196  const std::shared_ptr<InferBatchBuffer>& buffer, CudaStream& stream,
197  CudaEvent* consumeEvent) = 0;
198 
199 private:
200  DISABLE_CLASS_COPY(BackendContext);
201 };
202 
203 class TrtEngine;
204 
210 {
211 public:
213 
214 protected:
216  std::shared_ptr<TrtEngine> engine);
217 
218  int getLayerIdx(const std::string& bindingName) override;
219  int getNumBoundLayers() override;
220 
221  const NvDsInferBatchDimsLayerInfo& getLayerInfo(int bindingIdx) override
222  {
223  assert(bindingIdx < (int)m_AllLayers.size());
224  return m_AllLayers[bindingIdx];
225  }
226 
227  bool canSupportBatchDims(
228  int bindingIdx, const NvDsInferBatchDims& batchDims) override;
229 
230  virtual NvDsInferBatchDims getMaxBatchDims(int bindingIdx) override
231  {
232  assert(bindingIdx < (int)m_AllLayers.size());
233  return m_AllLayers[bindingIdx].profileDims[kSELECTOR_MAX];
234  }
235  virtual NvDsInferBatchDims getMinBatchDims(int bindingIdx) override
236  {
237  assert(bindingIdx < (int)m_AllLayers.size());
238  return m_AllLayers[bindingIdx].profileDims[kSELECTOR_MIN];
239  }
240  virtual NvDsInferBatchDims getOptBatchDims(int bindingIdx) override
241  {
242  assert(bindingIdx < (int)m_AllLayers.size());
243  return m_AllLayers[bindingIdx].profileDims[kSELECTOR_OPT];
244  }
245 
246 protected:
248  std::shared_ptr<TrtEngine> m_CudaEngine;
249  std::vector<NvDsInferBatchDimsLayerInfo> m_AllLayers;
250 
251  int m_GpuId = -1;
252 
253  static std::mutex sDLAExecutionMutex;
254 };
255 
260 {
261 public:
264  std::shared_ptr<TrtEngine> engine);
265 
266 private:
267  NvDsInferStatus initialize() override;
268 
269  NvDsInferStatus enqueueBuffer(
270  const std::shared_ptr<InferBatchBuffer>& buffer, CudaStream& stream,
271  CudaEvent* consumeEvent) override;
272 
273 protected:
274  bool canSupportBatchDims(
275  int bindingIdx, const NvDsInferBatchDims& batchDims) override;
276 
277  int m_MaxBatchSize = 0;
278 };
279 
284 {
285 public:
288  std::shared_ptr<TrtEngine> engine, int profile = 0);
289 
290 private:
291  NvDsInferStatus initialize() override;
292 
293  NvDsInferStatus enqueueBuffer(
294  const std::shared_ptr<InferBatchBuffer>& buffer, CudaStream& stream,
295  CudaEvent* consumeEvent) override;
296 
297 protected:
298  // Only idx 0 profile supported.
299  const int m_ProfileIndex = 0;
300 };
301 
306 {
307 public:
309  std::shared_ptr<TrtEngine> engine);
310 
312  const std::shared_ptr<InferBatchBuffer>& buffer, CudaStream& stream,
313  CudaEvent* consumeEvent) override;
314 };
315 
320 {
321 public:
323  std::shared_ptr<TrtEngine> engine, int profile = 0);
324 
326  const std::shared_ptr<InferBatchBuffer>& buffer, CudaStream& stream,
327  CudaEvent* consumeEvent) override;
328 
329 private:
330  static std::mutex sExecutionMutex;
331 };
332 
342 std::unique_ptr<TrtBackendContext> createBackendContext(
343  const std::shared_ptr<TrtEngine>& engine);
344 
345 } // end of namespace nvdsinfer
346 
347 #endif
nvdsinfer::FullDimTrtBackendContext::m_ProfileIndex
const int m_ProfileIndex
Definition: nvdsinfer_backend.h:299
nvdsinfer::FullDimTrtBackendContext
Backend context for full dimensions network.
Definition: nvdsinfer_backend.h:283
nvdsinfer::BackendContext::enqueueBuffer
virtual NvDsInferStatus enqueueBuffer(const std::shared_ptr< InferBatchBuffer > &buffer, CudaStream &stream, CudaEvent *consumeEvent)=0
nvdsinfer::CudaHostBuffer::~CudaHostBuffer
~CudaHostBuffer()
nvdsinfer::CudaBuffer
Helper base class for managing Cuda allocated buffers.
Definition: nvdsinfer_backend.h:86
nvdsinfer::CudaDeviceBuffer::CudaDeviceBuffer
CudaDeviceBuffer(size_t size)
nvdsinfer::InferBatchBuffer::getBatchDims
virtual NvDsInferBatchDims getBatchDims(int bindingIndex=0) const =0
nvdsinfer::TrtEngine
Helper class written on top of nvinfer1::ICudaEngine.
Definition: nvdsinfer_model_builder.h:256
nvdsinfer::BackendContext::getMaxBatchDims
virtual NvDsInferBatchDims getMaxBatchDims(int bindingIdx)=0
nvdsinfer::InferBatchBuffer::InferBatchBuffer
InferBatchBuffer()=default
nvdsinfer::TrtBackendContext::m_GpuId
int m_GpuId
Definition: nvdsinfer_backend.h:251
SIMPLE_MOVE_COPY
#define SIMPLE_MOVE_COPY(Cls)
Definition: infer_defines.h:29
nvdsinfer::TrtBackendContext::getLayerInfo
const NvDsInferBatchDimsLayerInfo & getLayerInfo(int bindingIdx) override
Definition: nvdsinfer_backend.h:221
nvdsinfer::BackendContext
Abstract interface for managing the actual inferencing implementation.
Definition: nvdsinfer_backend.h:168
nvdsinfer::BackendContext::getOptBatchDims
virtual NvDsInferBatchDims getOptBatchDims(int bindingIdx)=0
nvdsinfer::CudaStream
Helper class for managing Cuda Streams.
Definition: nvdsinfer_backend.h:40
nvdsinfer::BackendContext::~BackendContext
virtual ~BackendContext()=default
nvdsinfer::DlaFullDimTrtBackendContext::DlaFullDimTrtBackendContext
DlaFullDimTrtBackendContext(UniquePtrWDestroy< nvinfer1::IExecutionContext > &&ctx, std::shared_ptr< TrtEngine > engine, int profile=0)
nvdsinfer::TrtBackendContext::canSupportBatchDims
bool canSupportBatchDims(int bindingIdx, const NvDsInferBatchDims &batchDims) override
nvdsinfer::ImplicitTrtBackendContext
Backend context for implicit batch dimension network.
Definition: nvdsinfer_backend.h:259
nvdsinfer::CudaStream::ptr
cudaStream_t & ptr()
Definition: nvdsinfer_backend.h:46
nvdsinfer::CudaEvent::~CudaEvent
~CudaEvent()
nvdsinfer::CudaBuffer::CudaBuffer
CudaBuffer(size_t s)
Definition: nvdsinfer_backend.h:102
nvdsinfer::CudaDeviceBuffer::~CudaDeviceBuffer
~CudaDeviceBuffer()
nvdsinfer::InferBatchBuffer
Abstract interface to manage a batched buffer for inference.
Definition: nvdsinfer_backend.h:138
nvdsinfer::BackendContext::getMinBatchDims
virtual NvDsInferBatchDims getMinBatchDims(int bindingIdx)=0
NvDsInferDataType
NvDsInferDataType
Specifies the data type of a layer.
Definition: nvdsinfer.h:72
nvdsinfer::InferBatchBuffer::getDeviceBuffers
virtual std::vector< void * > & getDeviceBuffers()=0
nvdsinfer
Definition: nvdsinfer_model_builder.h:42
nvdsinfer::BackendContext::initialize
virtual NvDsInferStatus initialize()=0
CudaStream
Helper class for managing Cuda Streams.
Definition: nvdspreprocess_impl.h:97
nvdsinfer::BackendContext::getLayerInfo
virtual const NvDsInferBatchDimsLayerInfo & getLayerInfo(int bindingIdx)=0
nvdsinfer::TrtBackendContext::getNumBoundLayers
int getNumBoundLayers() override
nvdsinfer::TrtBackendContext
Base class for implementations of the BackendContext interface.
Definition: nvdsinfer_backend.h:209
nvdsinfer::FullDimTrtBackendContext::FullDimTrtBackendContext
FullDimTrtBackendContext(UniquePtrWDestroy< nvinfer1::IExecutionContext > &&ctx, std::shared_ptr< TrtEngine > engine, int profile=0)
nvdsinfer::CudaBuffer::ptr
T * ptr()
Definition: nvdsinfer_backend.h:93
nvdsinfer::InferBatchBuffer::getDataType
virtual NvDsInferDataType getDataType(int bindingIndex=0) const =0
nvdsinfer::CudaBuffer::move_copy
void move_copy(CudaBuffer &&o)
Definition: nvdsinfer_backend.h:103
nvdsinfer::TrtBackendContext::m_AllLayers
std::vector< NvDsInferBatchDimsLayerInfo > m_AllLayers
Definition: nvdsinfer_backend.h:249
cudaStream_t
struct CUstream_st * cudaStream_t
Forward declaration of cudaStream_t.
Definition: nvbufsurftransform.h:34
nvdsinfer::TrtBackendContext::getLayerIdx
int getLayerIdx(const std::string &bindingName) override
nvdsinfer::CudaEvent::CudaEvent
CudaEvent(uint flag=cudaEventDefault)
nvdsinfer::CudaBuffer::DISABLE_CLASS_COPY
DISABLE_CLASS_COPY(CudaBuffer)
nvdsinfer::BackendContext::getNumBoundLayers
virtual int getNumBoundLayers()=0
nvdsinfer::BackendContext::BackendContext
BackendContext()=default
nvdsinfer::TrtBackendContext::TrtBackendContext
TrtBackendContext(UniquePtrWDestroy< nvinfer1::IExecutionContext > &&ctx, std::shared_ptr< TrtEngine > engine)
nvdsinfer::TrtBackendContext::getOptBatchDims
virtual NvDsInferBatchDims getOptBatchDims(int bindingIdx) override
Definition: nvdsinfer_backend.h:240
nvdsinfer::CudaBuffer::bytes
size_t bytes() const
Definition: nvdsinfer_backend.h:90
nvdsinfer::DlaFullDimTrtBackendContext::enqueueBuffer
NvDsInferStatus enqueueBuffer(const std::shared_ptr< InferBatchBuffer > &buffer, CudaStream &stream, CudaEvent *consumeEvent) override
nvdsinfer::UniquePtrWDestroy< nvinfer1::IExecutionContext >
nvdsinfer::CudaEvent::ptr
cudaEvent_t & ptr()
Definition: nvdsinfer_backend.h:69
nvdsinfer::CudaDeviceBuffer
CUDA device buffers.
Definition: nvdsinfer_backend.h:118
nvdsinfer::DlaFullDimTrtBackendContext
Backend context for implicit batch dimension network inferencing on DLA.
Definition: nvdsinfer_backend.h:319
nvdsinfer::CudaHostBuffer::CudaHostBuffer
CudaHostBuffer(size_t size)
nvdsinfer::BackendContext::canSupportBatchDims
virtual bool canSupportBatchDims(int bindingIdx, const NvDsInferBatchDims &batchDims)=0
nvdsinfer::ImplicitTrtBackendContext::ImplicitTrtBackendContext
ImplicitTrtBackendContext(UniquePtrWDestroy< nvinfer1::IExecutionContext > &&ctx, std::shared_ptr< TrtEngine > engine)
nvdsinfer::CudaStream::CudaStream
CudaStream(uint flag=cudaStreamDefault, int priority=0)
nvdsinfer::CudaBuffer::m_Size
size_t m_Size
Definition: nvdsinfer_backend.h:112
nvdsinfer::DlaImplicitTrtBackendContext
Backend context for implicit batch dimension network inferencing on DLA.
Definition: nvdsinfer_backend.h:305
nvdsinfer::TrtBackendContext::sDLAExecutionMutex
static std::mutex sDLAExecutionMutex
Definition: nvdsinfer_backend.h:253
nvdsinfer::CudaBuffer::m_Buf
void * m_Buf
Definition: nvdsinfer_backend.h:111
nvdsinfer::TrtBackendContext::m_CudaEngine
std::shared_ptr< TrtEngine > m_CudaEngine
Definition: nvdsinfer_backend.h:248
nvdsinfer::TrtBackendContext::getMaxBatchDims
virtual NvDsInferBatchDims getMaxBatchDims(int bindingIdx) override
Definition: nvdsinfer_backend.h:230
nvdsinfer::CudaBuffer::~CudaBuffer
virtual ~CudaBuffer()=default
nvdsinfer::DlaImplicitTrtBackendContext::DlaImplicitTrtBackendContext
DlaImplicitTrtBackendContext(UniquePtrWDestroy< nvinfer1::IExecutionContext > &&ctx, std::shared_ptr< TrtEngine > engine)
nvdsinfer::ImplicitTrtBackendContext::m_MaxBatchSize
int m_MaxBatchSize
Definition: nvdsinfer_backend.h:277
nvdsinfer::CudaBuffer::ptr
void * ptr()
Definition: nvdsinfer_backend.h:98
nvdsinfer::TrtBackendContext::getMinBatchDims
virtual NvDsInferBatchDims getMinBatchDims(int bindingIdx) override
Definition: nvdsinfer_backend.h:235
nvdsinfer::CudaHostBuffer
CUDA host buffers.
Definition: nvdsinfer_backend.h:128
nvdsinfer::DlaImplicitTrtBackendContext::enqueueBuffer
NvDsInferStatus enqueueBuffer(const std::shared_ptr< InferBatchBuffer > &buffer, CudaStream &stream, CudaEvent *consumeEvent) override
nvdsinfer::InferBatchBuffer::~InferBatchBuffer
virtual ~InferBatchBuffer()=default
nvdsinfer::BackendContext::getLayerIdx
virtual int getLayerIdx(const std::string &bindingName)=0
nvdsinfer::TrtBackendContext::m_Context
UniquePtrWDestroy< nvinfer1::IExecutionContext > m_Context
Definition: nvdsinfer_backend.h:247
nvdsinfer::CudaEvent
Helper class for managing Cuda events.
Definition: nvdsinfer_backend.h:63
nvdsinfer::ImplicitTrtBackendContext::canSupportBatchDims
bool canSupportBatchDims(int bindingIdx, const NvDsInferBatchDims &batchDims) override
nvdsinfer::createBackendContext
std::unique_ptr< TrtBackendContext > createBackendContext(const std::shared_ptr< TrtEngine > &engine)
Create an instance of a BackendContext.
nvdsinfer::TrtBackendContext::~TrtBackendContext
~TrtBackendContext()
nvdsinfer::CudaStream::~CudaStream
~CudaStream()
NvDsInferStatus
NvDsInferStatus
Enum for the status codes returned by NvDsInferContext.
Definition: nvdsinfer.h:218
nvdsinfer_func_utils.h