NVIDIA DeepStream SDK API Reference

6.4 Release
nvdsinfer_backend.h
Go to the documentation of this file.
1 
12 #ifndef __NVDSINFER_BACKEND_H__
13 #define __NVDSINFER_BACKEND_H__
14 
15 #include <stdarg.h>
16 #include <condition_variable>
17 #include <memory>
18 #include <mutex>
19 #include <queue>
20 
21 #include <cuda_runtime_api.h>
22 
23 #include <NvCaffeParser.h>
24 #include <NvInfer.h>
25 #include <NvInferRuntime.h>
26 
27 #include "nvdsinfer_func_utils.h"
28 
29 /* This file provides backend inference interface for abstracting implementation
30  * details in various cases like inferencing on implicit batch dims/full dims
31  * network, inferencing on DLA etc. This file also provides helper classes for
32  * managing the lifecycle of CUDA resources like streams, buffers, events. */
33 
34 namespace nvdsinfer {
35 
40 {
41 public:
42  explicit CudaStream(uint flag = cudaStreamDefault, int priority = 0);
43  ~CudaStream();
44  operator cudaStream_t() { return m_Stream; }
45  cudaStream_t& ptr() { return m_Stream; }
47 
48 private:
49  void move_copy(CudaStream&& o)
50  {
51  m_Stream = o.m_Stream;
52  o.m_Stream = nullptr;
53  }
54  DISABLE_CLASS_COPY(CudaStream);
55 
56  cudaStream_t m_Stream = nullptr;
57 };
58 
62 class CudaEvent
63 {
64 public:
65  explicit CudaEvent(uint flag = cudaEventDefault);
66  ~CudaEvent();
67  operator cudaEvent_t() { return m_Event; }
68  cudaEvent_t& ptr() { return m_Event; }
70 
71 private:
72  void move_copy(CudaEvent&& o)
73  {
74  m_Event = o.m_Event;
75  o.m_Event = nullptr;
76  }
77  DISABLE_CLASS_COPY(CudaEvent);
78 
79  cudaEvent_t m_Event = nullptr;
80 };
81 
86 {
87 public:
88  virtual ~CudaBuffer() = default;
89  size_t bytes() const { return m_Size; }
90 
91  template <typename T>
92  T* ptr()
93  {
94  return (T*)m_Buf;
95  }
96 
97  void* ptr() { return m_Buf; }
99 
100 protected:
101  explicit CudaBuffer(size_t s) : m_Size(s) {}
103  {
104  m_Buf = o.m_Buf;
105  o.m_Buf = nullptr;
106  m_Size = o.m_Size;
107  o.m_Size = 0;
108  }
110  void* m_Buf = nullptr;
111  size_t m_Size = 0;
112 };
113 
118 {
119 public:
120  explicit CudaDeviceBuffer(size_t size);
122 };
123 
128 {
129 public:
130  explicit CudaHostBuffer(size_t size);
131  ~CudaHostBuffer();
132 };
133 
138 {
139 public:
140  InferBatchBuffer() = default;
141  virtual ~InferBatchBuffer() = default;
142 
143  /* Get device buffer pointers for bound layers associated with this batch. */
144  virtual std::vector<void*>& getDeviceBuffers() = 0;
145  /* Get the data type of the buffer(layer) for a bound layer having index
146  * `bindingIndex`. */
147  virtual NvDsInferDataType getDataType(int bindingIndex = 0) const = 0;
148  /* Get the batch dimensions for the buffer allocated for a bound layer having
149  * index `bindingIndex. */
150  virtual NvDsInferBatchDims getBatchDims(int bindingIndex = 0) const = 0;
151 
152 private:
153  DISABLE_CLASS_COPY(InferBatchBuffer);
154 };
155 
168 {
169 public:
170  BackendContext() = default;
171  virtual ~BackendContext() = default;
172 
173  /* Initialize the backend context. */
174  virtual NvDsInferStatus initialize() = 0;
175  /* Get the number of bound layers for the engine. */
176  virtual int getNumBoundLayers() = 0;
177 
178  /* Get information for a bound layer with index `bindingIdx`. */
179  virtual const NvDsInferBatchDimsLayerInfo& getLayerInfo(int bindingIdx) = 0;
180  /* Get binding index for a bound layer with name `bindingName`. */
181  virtual int getLayerIdx(const std::string& bindingName) = 0;
182 
183  /* Returns if the bound layer at index `bindingIdx` can support the
184  * provided batch dimensions. */
185  virtual bool canSupportBatchDims(
186  int bindingIdx, const NvDsInferBatchDims& batchDims) = 0;
187 
188  /* Get the min/max/optimal batch dimensions for a bound layer. */
189  virtual NvDsInferBatchDims getMaxBatchDims(int bindingIdx) = 0;
190  virtual NvDsInferBatchDims getMinBatchDims(int bindingIdx) = 0;
191  virtual NvDsInferBatchDims getOptBatchDims(int bindingIdx) = 0;
192 
193  /* Enqueue a batched buffer for inference. */
195  const std::shared_ptr<InferBatchBuffer>& buffer, CudaStream& stream,
196  CudaEvent* consumeEvent) = 0;
197 
198 private:
199  DISABLE_CLASS_COPY(BackendContext);
200 };
201 
202 class TrtEngine;
203 
209 {
210 public:
212 
213 protected:
215  std::shared_ptr<TrtEngine> engine);
216 
217  int getLayerIdx(const std::string& bindingName) override;
218  int getNumBoundLayers() override;
219 
220  const NvDsInferBatchDimsLayerInfo& getLayerInfo(int bindingIdx) override
221  {
222  assert(bindingIdx < (int)m_AllLayers.size());
223  return m_AllLayers[bindingIdx];
224  }
225 
226  bool canSupportBatchDims(
227  int bindingIdx, const NvDsInferBatchDims& batchDims) override;
228 
229  virtual NvDsInferBatchDims getMaxBatchDims(int bindingIdx) override
230  {
231  assert(bindingIdx < (int)m_AllLayers.size());
232  return m_AllLayers[bindingIdx].profileDims[kSELECTOR_MAX];
233  }
234  virtual NvDsInferBatchDims getMinBatchDims(int bindingIdx) override
235  {
236  assert(bindingIdx < (int)m_AllLayers.size());
237  return m_AllLayers[bindingIdx].profileDims[kSELECTOR_MIN];
238  }
239  virtual NvDsInferBatchDims getOptBatchDims(int bindingIdx) override
240  {
241  assert(bindingIdx < (int)m_AllLayers.size());
242  return m_AllLayers[bindingIdx].profileDims[kSELECTOR_OPT];
243  }
244 
245 protected:
247  std::shared_ptr<TrtEngine> m_CudaEngine;
248  std::vector<NvDsInferBatchDimsLayerInfo> m_AllLayers;
249 
250  int m_GpuId = -1;
251 
252  static std::mutex sDLAExecutionMutex;
253 };
254 
259 {
260 public:
263  std::shared_ptr<TrtEngine> engine);
264 
265 private:
266  NvDsInferStatus initialize() override;
267 
268  NvDsInferStatus enqueueBuffer(
269  const std::shared_ptr<InferBatchBuffer>& buffer, CudaStream& stream,
270  CudaEvent* consumeEvent) override;
271 
272 protected:
273  bool canSupportBatchDims(
274  int bindingIdx, const NvDsInferBatchDims& batchDims) override;
275 
276  int m_MaxBatchSize = 0;
277 };
278 
283 {
284 public:
287  std::shared_ptr<TrtEngine> engine, int profile = 0);
288 
289 private:
290  NvDsInferStatus initialize() override;
291 
292  NvDsInferStatus enqueueBuffer(
293  const std::shared_ptr<InferBatchBuffer>& buffer, CudaStream& stream,
294  CudaEvent* consumeEvent) override;
295 
296 protected:
297  // Only idx 0 profile supported.
298  const int m_ProfileIndex = 0;
299 };
300 
305 {
306 public:
308  std::shared_ptr<TrtEngine> engine);
309 
311  const std::shared_ptr<InferBatchBuffer>& buffer, CudaStream& stream,
312  CudaEvent* consumeEvent) override;
313 };
314 
319 {
320 public:
322  std::shared_ptr<TrtEngine> engine, int profile = 0);
323 
325  const std::shared_ptr<InferBatchBuffer>& buffer, CudaStream& stream,
326  CudaEvent* consumeEvent) override;
327 
328 private:
329  static std::mutex sExecutionMutex;
330 };
331 
341 std::unique_ptr<TrtBackendContext> createBackendContext(
342  const std::shared_ptr<TrtEngine>& engine);
343 
344 } // end of namespace nvdsinfer
345 
346 #endif
nvdsinfer::FullDimTrtBackendContext::m_ProfileIndex
const int m_ProfileIndex
Definition: nvdsinfer_backend.h:298
nvdsinfer::FullDimTrtBackendContext
Backend context for full dimensions network.
Definition: nvdsinfer_backend.h:282
nvdsinfer::BackendContext::enqueueBuffer
virtual NvDsInferStatus enqueueBuffer(const std::shared_ptr< InferBatchBuffer > &buffer, CudaStream &stream, CudaEvent *consumeEvent)=0
nvdsinfer::CudaHostBuffer::~CudaHostBuffer
~CudaHostBuffer()
nvdsinfer::CudaBuffer
Helper base class for managing Cuda allocated buffers.
Definition: nvdsinfer_backend.h:85
nvdsinfer::CudaDeviceBuffer::CudaDeviceBuffer
CudaDeviceBuffer(size_t size)
nvdsinfer::InferBatchBuffer::getBatchDims
virtual NvDsInferBatchDims getBatchDims(int bindingIndex=0) const =0
nvdsinfer::TrtEngine
Helper class written on top of nvinfer1::ICudaEngine.
Definition: nvdsinfer_model_builder.h:255
nvdsinfer::BackendContext::getMaxBatchDims
virtual NvDsInferBatchDims getMaxBatchDims(int bindingIdx)=0
nvdsinfer::InferBatchBuffer::InferBatchBuffer
InferBatchBuffer()=default
nvdsinfer::TrtBackendContext::m_GpuId
int m_GpuId
Definition: nvdsinfer_backend.h:250
nvdsinfer::TrtBackendContext::getLayerInfo
const NvDsInferBatchDimsLayerInfo & getLayerInfo(int bindingIdx) override
Definition: nvdsinfer_backend.h:220
nvdsinfer::BackendContext
Abstract interface for managing the actual inferencing implementation.
Definition: nvdsinfer_backend.h:167
nvdsinfer::BackendContext::getOptBatchDims
virtual NvDsInferBatchDims getOptBatchDims(int bindingIdx)=0
nvdsinfer::CudaStream
Helper class for managing Cuda Streams.
Definition: nvdsinfer_backend.h:39
nvdsinfer::BackendContext::~BackendContext
virtual ~BackendContext()=default
nvdsinfer::DlaFullDimTrtBackendContext::DlaFullDimTrtBackendContext
DlaFullDimTrtBackendContext(UniquePtrWDestroy< nvinfer1::IExecutionContext > &&ctx, std::shared_ptr< TrtEngine > engine, int profile=0)
nvdsinfer::TrtBackendContext::canSupportBatchDims
bool canSupportBatchDims(int bindingIdx, const NvDsInferBatchDims &batchDims) override
nvdsinfer::ImplicitTrtBackendContext
Backend context for implicit batch dimension network.
Definition: nvdsinfer_backend.h:258
nvdsinfer::CudaStream::ptr
cudaStream_t & ptr()
Definition: nvdsinfer_backend.h:45
nvdsinfer::CudaEvent::~CudaEvent
~CudaEvent()
nvdsinfer::CudaBuffer::CudaBuffer
CudaBuffer(size_t s)
Definition: nvdsinfer_backend.h:101
nvdsinfer::CudaDeviceBuffer::~CudaDeviceBuffer
~CudaDeviceBuffer()
nvdsinfer::InferBatchBuffer
Abstract interface to manage a batched buffer for inference.
Definition: nvdsinfer_backend.h:137
nvdsinfer::BackendContext::getMinBatchDims
virtual NvDsInferBatchDims getMinBatchDims(int bindingIdx)=0
NvDsInferDataType
NvDsInferDataType
Specifies the data type of a layer.
Definition: nvdsinfer.h:72
nvdsinfer::InferBatchBuffer::getDeviceBuffers
virtual std::vector< void * > & getDeviceBuffers()=0
nvdsinfer
Copyright (c) 2019-2021, NVIDIA CORPORATION.
Definition: nvdsinfer_model_builder.h:41
nvdsinfer::BackendContext::initialize
virtual NvDsInferStatus initialize()=0
CudaStream
Helper class for managing Cuda Streams.
Definition: nvdspreprocess_impl.h:107
nvdsinfer::BackendContext::getLayerInfo
virtual const NvDsInferBatchDimsLayerInfo & getLayerInfo(int bindingIdx)=0
nvdsinfer::TrtBackendContext::getNumBoundLayers
int getNumBoundLayers() override
nvdsinfer::TrtBackendContext
Base class for implementations of the BackendContext interface.
Definition: nvdsinfer_backend.h:208
nvdsinfer::FullDimTrtBackendContext::FullDimTrtBackendContext
FullDimTrtBackendContext(UniquePtrWDestroy< nvinfer1::IExecutionContext > &&ctx, std::shared_ptr< TrtEngine > engine, int profile=0)
nvdsinfer::CudaBuffer::ptr
T * ptr()
Definition: nvdsinfer_backend.h:92
nvdsinfer::InferBatchBuffer::getDataType
virtual NvDsInferDataType getDataType(int bindingIndex=0) const =0
nvdsinfer::CudaBuffer::move_copy
void move_copy(CudaBuffer &&o)
Definition: nvdsinfer_backend.h:102
nvdsinfer::TrtBackendContext::m_AllLayers
std::vector< NvDsInferBatchDimsLayerInfo > m_AllLayers
Definition: nvdsinfer_backend.h:248
cudaStream_t
struct CUstream_st * cudaStream_t
Forward declaration of cudaStream_t.
Definition: nvbufsurftransform.h:29
nvdsinfer::TrtBackendContext::getLayerIdx
int getLayerIdx(const std::string &bindingName) override
nvdsinfer::CudaEvent::CudaEvent
CudaEvent(uint flag=cudaEventDefault)
nvdsinfer::CudaBuffer::DISABLE_CLASS_COPY
DISABLE_CLASS_COPY(CudaBuffer)
nvdsinfer::BackendContext::getNumBoundLayers
virtual int getNumBoundLayers()=0
nvdsinfer::BackendContext::BackendContext
BackendContext()=default
nvdsinfer::TrtBackendContext::TrtBackendContext
TrtBackendContext(UniquePtrWDestroy< nvinfer1::IExecutionContext > &&ctx, std::shared_ptr< TrtEngine > engine)
nvdsinfer::TrtBackendContext::getOptBatchDims
virtual NvDsInferBatchDims getOptBatchDims(int bindingIdx) override
Definition: nvdsinfer_backend.h:239
nvdsinfer::CudaBuffer::bytes
size_t bytes() const
Definition: nvdsinfer_backend.h:89
nvdsinfer::DlaFullDimTrtBackendContext::enqueueBuffer
NvDsInferStatus enqueueBuffer(const std::shared_ptr< InferBatchBuffer > &buffer, CudaStream &stream, CudaEvent *consumeEvent) override
nvdsinfer::UniquePtrWDestroy< nvinfer1::IExecutionContext >
nvdsinfer::CudaEvent::ptr
cudaEvent_t & ptr()
Definition: nvdsinfer_backend.h:68
nvdsinfer::CudaDeviceBuffer
CUDA device buffers.
Definition: nvdsinfer_backend.h:117
nvdsinfer::DlaFullDimTrtBackendContext
Backend context for implicit batch dimension network inferencing on DLA.
Definition: nvdsinfer_backend.h:318
nvdsinfer::CudaHostBuffer::CudaHostBuffer
CudaHostBuffer(size_t size)
nvdsinfer::BackendContext::canSupportBatchDims
virtual bool canSupportBatchDims(int bindingIdx, const NvDsInferBatchDims &batchDims)=0
nvdsinfer::ImplicitTrtBackendContext::ImplicitTrtBackendContext
ImplicitTrtBackendContext(UniquePtrWDestroy< nvinfer1::IExecutionContext > &&ctx, std::shared_ptr< TrtEngine > engine)
nvdsinfer::CudaStream::CudaStream
CudaStream(uint flag=cudaStreamDefault, int priority=0)
nvdsinfer::CudaBuffer::m_Size
size_t m_Size
Definition: nvdsinfer_backend.h:111
nvdsinfer::DlaImplicitTrtBackendContext
Backend context for implicit batch dimension network inferencing on DLA.
Definition: nvdsinfer_backend.h:304
SIMPLE_MOVE_COPY
#define SIMPLE_MOVE_COPY(Cls)
Definition: infer_defines.h:34
nvdsinfer::TrtBackendContext::sDLAExecutionMutex
static std::mutex sDLAExecutionMutex
Definition: nvdsinfer_backend.h:252
nvdsinfer::CudaBuffer::m_Buf
void * m_Buf
Definition: nvdsinfer_backend.h:110
nvdsinfer::TrtBackendContext::m_CudaEngine
std::shared_ptr< TrtEngine > m_CudaEngine
Definition: nvdsinfer_backend.h:247
nvdsinfer::TrtBackendContext::getMaxBatchDims
virtual NvDsInferBatchDims getMaxBatchDims(int bindingIdx) override
Definition: nvdsinfer_backend.h:229
nvdsinfer::CudaBuffer::~CudaBuffer
virtual ~CudaBuffer()=default
nvdsinfer::DlaImplicitTrtBackendContext::DlaImplicitTrtBackendContext
DlaImplicitTrtBackendContext(UniquePtrWDestroy< nvinfer1::IExecutionContext > &&ctx, std::shared_ptr< TrtEngine > engine)
nvdsinfer::ImplicitTrtBackendContext::m_MaxBatchSize
int m_MaxBatchSize
Definition: nvdsinfer_backend.h:276
nvdsinfer::CudaBuffer::ptr
void * ptr()
Definition: nvdsinfer_backend.h:97
nvdsinfer::TrtBackendContext::getMinBatchDims
virtual NvDsInferBatchDims getMinBatchDims(int bindingIdx) override
Definition: nvdsinfer_backend.h:234
nvdsinfer::CudaHostBuffer
CUDA host buffers.
Definition: nvdsinfer_backend.h:127
nvdsinfer::DlaImplicitTrtBackendContext::enqueueBuffer
NvDsInferStatus enqueueBuffer(const std::shared_ptr< InferBatchBuffer > &buffer, CudaStream &stream, CudaEvent *consumeEvent) override
nvdsinfer::InferBatchBuffer::~InferBatchBuffer
virtual ~InferBatchBuffer()=default
nvdsinfer::BackendContext::getLayerIdx
virtual int getLayerIdx(const std::string &bindingName)=0
nvdsinfer::TrtBackendContext::m_Context
UniquePtrWDestroy< nvinfer1::IExecutionContext > m_Context
Definition: nvdsinfer_backend.h:246
nvdsinfer::CudaEvent
Helper class for managing Cuda events.
Definition: nvdsinfer_backend.h:62
nvdsinfer::ImplicitTrtBackendContext::canSupportBatchDims
bool canSupportBatchDims(int bindingIdx, const NvDsInferBatchDims &batchDims) override
nvdsinfer::createBackendContext
std::unique_ptr< TrtBackendContext > createBackendContext(const std::shared_ptr< TrtEngine > &engine)
Create an instance of a BackendContext.
nvdsinfer::TrtBackendContext::~TrtBackendContext
~TrtBackendContext()
nvdsinfer::CudaStream::~CudaStream
~CudaStream()
NvDsInferStatus
NvDsInferStatus
Enum for the status codes returned by NvDsInferContext.
Definition: nvdsinfer.h:218
nvdsinfer_func_utils.h