NVIDIA DeepStream SDK API Reference

7.0 Release
infer_cuda_utils.h
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2019-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
4  *
5  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
6  * property and proprietary rights in and to this material, related
7  * documentation and any modifications thereto. Any use, reproduction,
8  * disclosure or distribution of this material and related documentation
9  * without an express license agreement from NVIDIA CORPORATION or
10  * its affiliates is strictly prohibited.
11  */
12 
20 #ifndef __NVDSINFER_CUDA_UTILS_H__
21 #define __NVDSINFER_CUDA_UTILS_H__
22 
23 #include <stdarg.h>
24 #include <condition_variable>
25 #include <memory>
26 #include <mutex>
27 #include <queue>
28 
29 #include <cuda.h>
30 #include <cuda_runtime_api.h>
31 #include "infer_batch_buffer.h"
32 #include "infer_common.h"
33 
34 namespace nvdsinferserver {
35 
39 class CudaStream {
40 public:
41  explicit CudaStream(
42  uint flag = cudaStreamDefault, int gpuId = 0, int priority = 0);
43  ~CudaStream();
44  operator cudaStream_t() { return m_Stream; }
45  int devId() const { return m_GpuId; }
46  cudaStream_t& ptr() { return m_Stream; }
48 
49 private:
50  void move_copy(CudaStream&& o) {
51  m_Stream = o.m_Stream;
52  o.m_Stream = nullptr;
53  }
54  DISABLE_CLASS_COPY(CudaStream);
55 
56  cudaStream_t m_Stream = nullptr;
57  int m_GpuId = 0;
58 };
59 
63 class CudaEvent {
64 public:
65  explicit CudaEvent(uint flag = cudaEventDefault, int gpuId = 0);
66  virtual ~CudaEvent();
67  operator cudaEvent_t() { return m_Event; }
68  int devId() const { return m_GpuId; }
69  cudaEvent_t& ptr() { return m_Event; }
71 
72 private:
73  void move_copy(CudaEvent&& o) {
74  m_Event = o.m_Event;
75  o.m_Event = nullptr;
76  }
77 
78  DISABLE_CLASS_COPY(CudaEvent);
79 
80  cudaEvent_t m_Event = nullptr;
81  int m_GpuId = 0;
82 };
83 
87 class SysMem {
88 public:
89  virtual ~SysMem() = default;
90  size_t bytes() const { return m_Size; }
91 
92  template <typename T>
93  T* ptr() const {
94  return (T*)m_Buf;
95  }
96 
97  void* ptr() const { return m_Buf; }
98  int devId() const { return m_DevId; }
99  InferMemType type() const { return m_Type; }
100  void reuse() {}
101  virtual void grow(size_t bytes) = 0;
102 
104 
105 protected:
106  SysMem(size_t s, int devId) : m_Size(s), m_DevId(devId) {}
107  void move_copy(SysMem&& o) {
108  m_Buf = o.m_Buf;
109  o.m_Buf = nullptr;
110  m_Size = o.m_Size;
111  o.m_Size = 0;
112  m_DevId = o.m_DevId;
113  o.m_DevId = 0;
114  m_Type = o.m_Type;
115  o.m_Type = InferMemType::kNone;
116  }
117 
119 
120  void* m_Buf = nullptr;
121  size_t m_Size = 0;
122  int m_DevId = 0;
124 };
125 
129 class CudaDeviceMem : public SysMem {
130 public:
131  CudaDeviceMem(size_t size, int gpuId = 0);
132  ~CudaDeviceMem() override;
133  void grow(size_t bytes) override;
134 
135 private:
136  void _allocate(size_t bytes);
137 };
138 
142 class CudaHostMem : public SysMem {
143 public:
144  CudaHostMem(size_t size, int gpuId = 0);
145  ~CudaHostMem() override;
146  void grow(size_t bytes) override;
147 
148 private:
149  void _allocate(size_t bytes);
150 };
151 
155 class CpuMem : public SysMem {
156 public:
157  CpuMem(size_t size);
158  ~CpuMem() override;
159  void grow(size_t bytes) override;
160 
161 private:
162  std::vector<uint8_t> m_Data;
163 };
164 
169 public:
170  CudaTensorBuf(const InferDims& dims, InferDataType dt, int batchSize,
171  const std::string& name, InferMemType mt, int devId, bool initCuEvent);
172 
173  ~CudaTensorBuf() override;
174 
175  void setBatchSize(uint32_t size) override {
176  assert(size <= m_MaxBatchSize);
178  }
179 
180  void setName(const std::string& name) { mutableBufDesc().name = name; }
181  void* getBufPtr(uint32_t batchIdx) const final;
182  void reuse()
183  {
184  setBatchSize(m_MaxBatchSize);
185  detach();
186  }
187 
188 private:
189  UniqSysMem m_CudaMem;
190  uint32_t m_MaxBatchSize = 0;
191 };
192 
207 UniqCudaTensorBuf createTensorBuf(const InferDims& dims, InferDataType dt,
208  int batchSize, const std::string& name, InferMemType mt, int devId,
209  bool initCuEvent);
210 
224 UniqCudaTensorBuf createGpuTensorBuf(const InferDims& dims,
225  InferDataType dt, int batchSize, const std::string& name = "",
226  int devId = 0, bool initCuEvent = false);
227 
241 UniqCudaTensorBuf createCpuTensorBuf(const InferDims& dims,
242  InferDataType dt, int batchSize, const std::string& name = "",
243  int devId = 0, bool initCuEvent = false);
244 
251 
252 } // namespace nvdsinferserver
253 
254 #endif
nvdsinferserver
This is a header file for pre-processing cuda kernels with normalization and mean subtraction require...
Definition: infer_custom_process.h:24
nvdsinferserver::InferDataType
InferDataType
Datatype of the tensor buffer.
Definition: infer_datatypes.h:83
nvdsinferserver::CudaHostMem
Allocates and manages CUDA pinned memory.
Definition: infer_cuda_utils.h:142
nvdsinferserver::SysMem::m_Buf
void * m_Buf
Definition: infer_cuda_utils.h:120
nvdsinferserver::CpuMem::CpuMem
CpuMem(size_t size)
nvdsinferserver::CudaEvent::~CudaEvent
virtual ~CudaEvent()
SIMPLE_MOVE_COPY
#define SIMPLE_MOVE_COPY(Cls)
Definition: infer_defines.h:29
nvdsinferserver::CudaHostMem::grow
void grow(size_t bytes) override
nvdsinferserver::createTensorBuf
UniqCudaTensorBuf createTensorBuf(const InferDims &dims, InferDataType dt, int batchSize, const std::string &name, InferMemType mt, int devId, bool initCuEvent)
Create a tensor buffer of the specified memory type, dimensions on the given device.
nvdsinferserver::SysMem::devId
int devId() const
Definition: infer_cuda_utils.h:98
nvdsinferserver::CudaEvent::CudaEvent
CudaEvent(uint flag=cudaEventDefault, int gpuId=0)
nvdsinferserver::SysMem::~SysMem
virtual ~SysMem()=default
infer_batch_buffer.h
Header file of batch buffer related class declarations.
nvdsinferserver::CudaStream::ptr
cudaStream_t & ptr()
Definition: infer_cuda_utils.h:46
nvdsinferserver::CudaEvent::ptr
cudaEvent_t & ptr()
Definition: infer_cuda_utils.h:69
nvdsinferserver::CpuMem::grow
void grow(size_t bytes) override
nvdsinferserver::SysMem::m_Size
size_t m_Size
Definition: infer_cuda_utils.h:121
nvdsinferserver::BaseBatchBuffer::detach
void detach()
Definition: infer_batch_buffer.h:72
nvdsinferserver::SysMem::ptr
void * ptr() const
Definition: infer_cuda_utils.h:97
nvdsinferserver::CudaStream::devId
int devId() const
Definition: infer_cuda_utils.h:45
nvdsinferserver::SysMem::type
InferMemType type() const
Definition: infer_cuda_utils.h:99
nvdsinferserver::CudaEvent
Wrapper class for CUDA events.
Definition: infer_cuda_utils.h:63
nvdsinferserver::BaseBatchBuffer::setBatchSize
virtual void setBatchSize(uint32_t size)
Definition: infer_batch_buffer.h:51
nvdsinferserver::CudaTensorBuf::setName
void setName(const std::string &name)
Definition: infer_cuda_utils.h:180
nvdsinferserver::UniqCudaTensorBuf
std::unique_ptr< CudaTensorBuf > UniqCudaTensorBuf
Definition: infer_common.h:90
nvdsinferserver::CudaStream
Wrapper class for CUDA streams.
Definition: infer_cuda_utils.h:39
infer_common.h
Header file of the common declarations for the nvinferserver library.
CudaStream
Helper class for managing Cuda Streams.
Definition: nvdspreprocess_impl.h:97
nvdsinferserver::SysMem::grow
virtual void grow(size_t bytes)=0
nvdsinferserver::SysMem::move_copy
void move_copy(SysMem &&o)
Definition: infer_cuda_utils.h:107
nvdsinferserver::CudaTensorBuf::reuse
void reuse()
Definition: infer_cuda_utils.h:182
nvdsinferserver::CudaStream::~CudaStream
~CudaStream()
nvdsinferserver::SysMem::ptr
T * ptr() const
Definition: infer_cuda_utils.h:93
cudaStream_t
struct CUstream_st * cudaStream_t
Forward declaration of cudaStream_t.
Definition: nvbufsurftransform.h:34
nvdsinferserver::CudaTensorBuf::~CudaTensorBuf
~CudaTensorBuf() override
nvdsinferserver::createGpuTensorBuf
UniqCudaTensorBuf createGpuTensorBuf(const InferDims &dims, InferDataType dt, int batchSize, const std::string &name="", int devId=0, bool initCuEvent=false)
Create a CUDA device memory tensor buffer of specified dimensions on the given device.
nvdsinferserver::createCpuTensorBuf
UniqCudaTensorBuf createCpuTensorBuf(const InferDims &dims, InferDataType dt, int batchSize, const std::string &name="", int devId=0, bool initCuEvent=false)
Create a CUDA pinned memory tensor buffer of specified dimensions on the given device.
nvdsinferserver::CpuMem::~CpuMem
~CpuMem() override
nvdsinferserver::syncAllCudaEvents
NvDsInferStatus syncAllCudaEvents(const SharedBatchArray &bufList)
Synchronize on all events associated with the batch buffer array.
nvdsinferserver::CudaTensorBuf::setBatchSize
void setBatchSize(uint32_t size) override
Definition: infer_cuda_utils.h:175
nvdsinferserver::CpuMem
Allocates and manages host memory.
Definition: infer_cuda_utils.h:155
nvdsinferserver::CudaDeviceMem::~CudaDeviceMem
~CudaDeviceMem() override
nvdsinferserver::SysMem
Base class for managing memory allocation.
Definition: infer_cuda_utils.h:87
nvdsinferserver::SysMem::m_DevId
int m_DevId
Definition: infer_cuda_utils.h:122
nvdsinferserver::CudaHostMem::~CudaHostMem
~CudaHostMem() override
nvdsinferserver::CudaTensorBuf::getBufPtr
void * getBufPtr(uint32_t batchIdx) const final
nvdsinferserver::InferMemType
InferMemType
The memory types of inference buffers.
Definition: infer_datatypes.h:56
nvdsinferserver::CudaEvent::devId
int devId() const
Definition: infer_cuda_utils.h:68
nvdsinferserver::InferDims
Holds the information about the dimensions of a neural network layer.
Definition: infer_datatypes.h:146
nvdsinferserver::CudaDeviceMem::CudaDeviceMem
CudaDeviceMem(size_t size, int gpuId=0)
nvdsinferserver::SysMem::bytes
size_t bytes() const
Definition: infer_cuda_utils.h:90
nvdsinferserver::CudaDeviceMem::grow
void grow(size_t bytes) override
nvdsinferserver::CudaTensorBuf::CudaTensorBuf
CudaTensorBuf(const InferDims &dims, InferDataType dt, int batchSize, const std::string &name, InferMemType mt, int devId, bool initCuEvent)
nvdsinferserver::BaseBatchBuffer
The base class for batch buffers.
Definition: infer_batch_buffer.h:34
nvdsinferserver::CudaStream::CudaStream
CudaStream(uint flag=cudaStreamDefault, int gpuId=0, int priority=0)
nvdsinferserver::CudaTensorBuf
A batch buffer with CUDA memory allocation.
Definition: infer_cuda_utils.h:168
nvdsinferserver::BaseBatchBuffer::mutableBufDesc
InferBufferDescription & mutableBufDesc()
Definition: infer_batch_buffer.h:50
nvdsinferserver::CudaDeviceMem
Allocates and manages CUDA device memory.
Definition: infer_cuda_utils.h:129
nvdsinferserver::CudaHostMem::CudaHostMem
CudaHostMem(size_t size, int gpuId=0)
nvdsinferserver::SysMem::DISABLE_CLASS_COPY
DISABLE_CLASS_COPY(SysMem)
nvdsinferserver::SharedBatchArray
std::shared_ptr< BaseBatchArray > SharedBatchArray
Definition: infer_common.h:75
nvdsinferserver::InferMemType::kNone
@ kNone
nvdsinferserver::SysMem::SysMem
SysMem(size_t s, int devId)
Definition: infer_cuda_utils.h:106
nvdsinferserver::UniqSysMem
std::unique_ptr< SysMem > UniqSysMem
Definition: infer_common.h:89
nvdsinferserver::InferBufferDescription::name
std::string name
Name of the buffer.
Definition: infer_datatypes.h:192
NvDsInferStatus
NvDsInferStatus
Enum for the status codes returned by NvDsInferContext.
Definition: nvdsinfer.h:218
nvdsinferserver::SysMem::reuse
void reuse()
Definition: infer_cuda_utils.h:100
nvdsinferserver::SysMem::m_Type
InferMemType m_Type
Definition: infer_cuda_utils.h:123