NVIDIA DeepStream SDK API Reference

6.4 Release
infer_trtis_backend.h
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
4  *
5  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
6  * property and proprietary rights in and to this material, related
7  * documentation and any modifications thereto. Any use, reproduction,
8  * disclosure or distribution of this material and related documentation
9  * without an express license agreement from NVIDIA CORPORATION or
10  * its affiliates is strictly prohibited.
11  */
12 
21 #ifndef __NVDSINFER_TRTIS_BACKEND_H__
22 #define __NVDSINFER_TRTIS_BACKEND_H__
23 
24 #include "infer_base_backend.h"
25 #include "infer_common.h"
26 #include "infer_post_datatypes.h"
27 #include "infer_utils.h"
28 
29 namespace nvdsinferserver {
30 
31 class TrtServerAllocator;
32 class TrtISServer;
33 class TrtServerRequest;
34 class TrtServerResponse;
35 
39 class TrtISBackend : public BaseBackend {
40 public:
48  const std::string& name, int64_t version, TrtServerPtr ptr = nullptr);
49 
53  ~TrtISBackend() override;
54 
59  m_ClassifyParams.emplace_back(c); }
60 
65  void setOutputPoolSize(int size) { m_PerPoolSize = size; }
66  int outputPoolSize() const { return m_PerPoolSize; }
67  void setOutputMemType(InferMemType memType) { m_OutputMemType = memType; }
68  InferMemType outputMemType() const { return m_OutputMemType; }
69  void setOutputDevId(int64_t devId) { m_OutputDevId = devId; }
70  int64_t outputDevId() const { return m_OutputDevId; }
71  std::vector<TritonClassParams> getClassifyParams() {
72  return m_ClassifyParams;}
73  const std::string& model() const { return m_Model; }
74  int64_t version() const { return m_ModelVersion; }
82  NvDsInferStatus initialize() override;
83 
89  NvDsInferStatus specifyInputDims(const InputShapes& shapes) override;
90 
101  InputsConsumed bufConsumed, InferenceDone inferenceDone) override;
102 
110  void setTensorMaxBytes(const std::string& name, size_t maxBytes)
111  {
112  size_t& bytes = m_TensorMaxBytes[name];
113  bytes = std::max<size_t>(maxBytes, bytes);
114  bytes = INFER_ROUND_UP(bytes, INFER_MEM_ALIGNMENT);
115  }
116 
117 protected:
118  // interface for derived class
119 
124  virtual void requestTritonOutputNames(std::set<std::string>& outNames);
125 
131 
137 
144 
149  {
150  m_ResponseAllocator = std::move(allocator);
151  }
152 
160 
164  TrtServerPtr& server() { return m_Server; }
165 
169  using AsyncDone = std::function<void(NvDsInferStatus, SharedBatchArray)>;
180  virtual NvDsInferStatus Run(
181  SharedBatchArray inputs, InputsConsumed bufConsumed,
182  AsyncDone asyncDone);
183 
189 
200  const std::string& tensor, size_t bytes, InferMemType memType, int64_t devId);
201 
207  void releaseResponseBuf(const std::string& tensor, SharedSysMem mem);
208 
216 
220  enum { kName, kGpuId, kMemType };
224  using PoolKey = std::tuple<std::string, int64_t, InferMemType>;
229  using PoolValue = SharedBufPool<UniqSysMem>;
230 
235 
242  PoolValue createResponsePool(PoolKey& key, size_t bytes);
243 
255  std::shared_ptr<TrtServerRequest> request,
256  std::unique_ptr<TrtServerResponse> uniqResponse,
257  InputsConsumed inputsConsumed, AsyncDone asyncDone);
258 
262  struct ReorderItem {
276 
281  std::promise<void> promise;
282  std::future<void> future;
289  };
290  /*
291  * @brief Pointer to the reorder thread task.
292  */
293  using ReorderItemPtr = std::shared_ptr<ReorderItem>;
301 
308  bool debatchingOutput(SharedBatchArray& outputs, SharedBatchArray& inputs);
309 
310 private:
314  std::string m_Model;
318  int64_t m_ModelVersion = -1;
322  TrtServerPtr m_Server;
327  bool m_NeedUnload = false;
331  std::vector<TritonClassParams> m_ClassifyParams;
335  ShrTritonAllocator m_ResponseAllocator;
339  InferMemType m_OutputMemType = InferMemType::kNone;
343  int64_t m_OutputDevId = -1;
347  int m_PerPoolSize = 2;
351  std::map<PoolKey, PoolValue> m_ResponsePool;
355  using SharedMutex = std::shared_timed_mutex;
359  SharedMutex m_ResponseMutex;
363  std::unordered_map<std::string, size_t> m_TensorMaxBytes;
364 
368  using ReorderThread = QueueThread<std::vector<ReorderItemPtr>>;
372  std::unique_ptr<ReorderThread> m_ReorderThread;
373 };
374 
375 } // namespace nvdsinferserver
376 
377 #endif
nvdsinferserver
Copyright (c) 2021, NVIDIA CORPORATION.
Definition: infer_custom_process.h:28
nvdsinferserver::TrtISBackend::PoolValue
SharedBufPool< UniqSysMem > PoolValue
The buffer pool for the specified tensor, GPU and memory type combination.
Definition: infer_trtis_backend.h:229
nvdsinferserver::TrtISBackend::findResponsePool
PoolValue findResponsePool(PoolKey &key)
Find the buffer pool for the given key.
nvdsinferserver::TrtISBackend::initialize
NvDsInferStatus initialize() override
Check that the server and model is ready, get the information of layers, setup reorder thread and out...
nvdsinferserver::TrtISBackend::model
const std::string & model() const
Definition: infer_trtis_backend.h:73
TritonClassParams
Definition: infer_post_datatypes.h:96
nvdsinferserver::TrtISBackend::ensureModelReady
virtual NvDsInferStatus ensureModelReady()
Check that the model is ready, load the model if it is not.
nvdsinferserver::IBackend::InputShapes
std::vector< InputShapeTuple > InputShapes
Definition: infer_ibackend.h:84
nvdsinferserver::SharedSysMem
std::shared_ptr< SysMem > SharedSysMem
Definition: infer_common.h:88
nvdsinferserver::IBackend::InputsConsumed
std::function< void(SharedBatchArray)> InputsConsumed
Function wrapper called after the input buffer is consumed.
Definition: infer_ibackend.h:70
nvdsinferserver::TrtISBackend::ReorderItem::outputs
SharedBatchArray outputs
Array of output batch buffers.
Definition: infer_trtis_backend.h:275
nvdsinferserver::TrtISBackend::specifyInputDims
NvDsInferStatus specifyInputDims(const InputShapes &shapes) override
Specify the input layers for the backend.
nvdsinferserver::TrtISBackend
Triton backend processing class.
Definition: infer_trtis_backend.h:39
nvdsinferserver::TrtISBackend::kName
@ kName
Definition: infer_trtis_backend.h:220
nvdsinferserver::TrtISBackend::kMemType
@ kMemType
Definition: infer_trtis_backend.h:220
NVDSINFER_SUCCESS
@ NVDSINFER_SUCCESS
NvDsInferContext operation succeeded.
Definition: nvdsinfer.h:220
nvdsinferserver::TrtISBackend::ReorderItem::future
std::future< void > future
Definition: infer_trtis_backend.h:282
nvdsinferserver::TrtISBackend::setAllocator
void setAllocator(UniqTritonAllocator allocator)
Set the output tensor allocator.
Definition: infer_trtis_backend.h:148
nvdsinferserver::TrtISBackend::outputMemType
InferMemType outputMemType() const
Definition: infer_trtis_backend.h:68
nvdsinferserver::UniqTritonAllocator
std::unique_ptr< TrtServerAllocator > UniqTritonAllocator
Definition: infer_common.h:123
nvdsinferserver::ShrTritonAllocator
std::shared_ptr< TrtServerAllocator > ShrTritonAllocator
Definition: infer_common.h:124
nvdsinferserver::TrtISBackend::createResponsePool
PoolValue createResponsePool(PoolKey &key, size_t bytes)
Create a new buffer pool for the key.
nvdsinferserver::TrtServerPtr
std::shared_ptr< TrtISServer > TrtServerPtr
Definition: infer_common.h:121
infer_utils.h
Header file containing utility functions and classes used by the nvinferserver low level library.
nvdsinferserver::TrtISBackend::AsyncDone
std::function< void(NvDsInferStatus, SharedBatchArray)> AsyncDone
Asynchronous inference done function: AsyncDone(Status, outputs).
Definition: infer_trtis_backend.h:169
nvdsinferserver::TrtISBackend::debatchingOutput
bool debatchingOutput(SharedBatchArray &outputs, SharedBatchArray &inputs)
Separate the batch dimension from the output buffer descriptors.
infer_common.h
Header file of the common declarations for the nvinferserver library.
nvdsinferserver::TrtISBackend::ReorderItem
Reorder thread task.
Definition: infer_trtis_backend.h:262
nvdsinferserver::IBackend::InferenceDone
std::function< void(NvDsInferStatus, SharedBatchArray)> InferenceDone
Function wrapper for post inference processing.
Definition: infer_ibackend.h:66
nvdsinferserver::TrtISBackend::ReorderItem::status
NvDsInferStatus status
Status of processing.
Definition: infer_trtis_backend.h:266
nvdsinferserver::TrtISBackend::outputDevId
int64_t outputDevId() const
Definition: infer_trtis_backend.h:70
nvdsinferserver::TrtISBackend::version
int64_t version() const
Definition: infer_trtis_backend.h:74
nvdsinferserver::TrtISBackend::ensureInputs
NvDsInferStatus ensureInputs(SharedBatchArray &inputs)
Ensure that the array of input buffers are expected by the model and reshape the input buffers if req...
nvdsinferserver::TrtISBackend::kGpuId
@ kGpuId
Definition: infer_trtis_backend.h:220
nvdsinferserver::TrtISBackend::enqueue
NvDsInferStatus enqueue(SharedBatchArray inputs, SharedCuStream stream, InputsConsumed bufConsumed, InferenceDone inferenceDone) override
Enqueue an input for inference request by calling Run() and adding corresponding task to the reorder ...
nvdsinferserver::TrtISBackend::setTensorMaxBytes
void setTensorMaxBytes(const std::string &name, size_t maxBytes)
Set the maximum size for the tensor, the maximum of the existing size and new input size is used.
Definition: infer_trtis_backend.h:110
nvdsinferserver::TrtISBackend::setOutputPoolSize
void setOutputPoolSize(int size)
Helper function to access the member variables.
Definition: infer_trtis_backend.h:65
nvdsinferserver::TrtISBackend::ReorderItem::inferenceDone
InferenceDone inferenceDone
Inference done callback function.
Definition: infer_trtis_backend.h:288
nvdsinferserver::TrtISBackend::ensureServerReady
virtual NvDsInferStatus ensureServerReady()
Check that the Triton inference server is live.
nvdsinferserver::TrtISBackend::requestTritonOutputNames
virtual void requestTritonOutputNames(std::set< std::string > &outNames)
Get the list of output tensor names.
INFER_ROUND_UP
#define INFER_ROUND_UP(value, align)
Definition: infer_defines.h:127
nvdsinferserver::TrtISBackend::Run
virtual NvDsInferStatus Run(SharedBatchArray inputs, InputsConsumed bufConsumed, AsyncDone asyncDone)
Create an inference request and trigger asynchronous inference.
nvdsinferserver::SharedCuStream
std::shared_ptr< CudaStream > SharedCuStream
Cuda based pointers.
Definition: infer_common.h:84
infer_post_datatypes.h
nvdsinferserver::TrtISBackend::allocateResponseBuf
SharedSysMem allocateResponseBuf(const std::string &tensor, size_t bytes, InferMemType memType, int64_t devId)
Acquire a buffer from the output buffer pool associated with the device ID and memory type.
nvdsinferserver::TrtISBackend::getClassifyParams
std::vector< TritonClassParams > getClassifyParams()
Definition: infer_trtis_backend.h:71
nvdsinferserver::InferMemType
InferMemType
The memory types of inference buffers.
Definition: infer_datatypes.h:56
nvdsinferserver::TrtISBackend::PoolKey
std::tuple< std::string, int64_t, InferMemType > PoolKey
Tuple holding tensor name, GPU ID, memory type.
Definition: infer_trtis_backend.h:224
nvdsinferserver::TrtISBackend::setOutputMemType
void setOutputMemType(InferMemType memType)
Definition: infer_trtis_backend.h:67
nvdsinferserver::TrtISBackend::releaseResponseBuf
void releaseResponseBuf(const std::string &tensor, SharedSysMem mem)
Release the output tensor buffer.
nvdsinferserver::TrtISBackend::setOutputDevId
void setOutputDevId(int64_t devId)
Definition: infer_trtis_backend.h:69
nvdsinferserver::TrtISBackend::inferenceDoneReorderLoop
bool inferenceDoneReorderLoop(ReorderItemPtr item)
Add input buffers to the output buffer list if required.
nvdsinferserver::TrtISBackend::~TrtISBackend
~TrtISBackend() override
Destructor.
nvdsinferserver::TrtISBackend::outputPoolSize
int outputPoolSize() const
Definition: infer_trtis_backend.h:66
INFER_MEM_ALIGNMENT
#define INFER_MEM_ALIGNMENT
Definition: infer_defines.h:130
nvdsinferserver::BaseBackend
Base class of inference backend processing.
Definition: infer_base_backend.h:40
infer_base_backend.h
Header file for inference processing backend base class.
nvdsinferserver::TrtISBackend::serverInferCompleted
void serverInferCompleted(std::shared_ptr< TrtServerRequest > request, std::unique_ptr< TrtServerResponse > uniqResponse, InputsConsumed inputsConsumed, AsyncDone asyncDone)
Call the inputs consumed function and parse the inference response to form the array of output batch ...
nvdsinferserver::TrtISBackend::ReorderItem::promise
std::promise< void > promise
Synchronization objects.
Definition: infer_trtis_backend.h:281
nvdsinferserver::TrtISBackend::fixateDims
NvDsInferStatus fixateDims(const SharedBatchArray &bufs)
Extend the dimensions to include batch size for the buffers in input array.
nvdsinferserver::TrtISBackend::setupReorderThread
NvDsInferStatus setupReorderThread()
Create a loop thread that calls inferenceDoneReorderLoop on the queued items.
nvdsinferserver::TrtISBackend::ReorderItem::inputs
SharedBatchArray inputs
Array of input batch buffers.
Definition: infer_trtis_backend.h:271
nvdsinferserver::TrtISBackend::server
TrtServerPtr & server()
Get the Triton server handle.
Definition: infer_trtis_backend.h:164
nvdsinferserver::TrtISBackend::TrtISBackend
TrtISBackend(const std::string &name, int64_t version, TrtServerPtr ptr=nullptr)
Constructor.
nvdsinferserver::TrtISBackend::setupLayersInfo
virtual NvDsInferStatus setupLayersInfo()
Get the model configuration from the server and populate layer information.
nvdsinferserver::SharedBatchArray
std::shared_ptr< BaseBatchArray > SharedBatchArray
Definition: infer_common.h:75
nvdsinferserver::InferMemType::kNone
@ kNone
nvdsinferserver::TrtISBackend::ReorderItemPtr
std::shared_ptr< ReorderItem > ReorderItemPtr
Definition: infer_trtis_backend.h:293
nvdsinferserver::TrtISBackend::addClassifyParams
void addClassifyParams(const TritonClassParams &c)
Add Triton Classification parameters to the list.
Definition: infer_trtis_backend.h:58
NvDsInferStatus
NvDsInferStatus
Enum for the status codes returned by NvDsInferContext.
Definition: nvdsinfer.h:218