|
|
NVIDIA DeepStream SDK API Reference
|
9.0 Release
|
Go to the documentation of this file.
21 #ifndef __NVDSINFER_TRTIS_BACKEND_H__
22 #define __NVDSINFER_TRTIS_BACKEND_H__
26 #include "infer_post_datatypes.h"
31 class TrtServerAllocator;
33 class TrtServerRequest;
34 class TrtServerResponse;
39 class TrtISBackend :
public BaseBackend {
59 m_ClassifyParams.emplace_back(c); }
72 return m_ClassifyParams;}
73 const std::string&
model()
const {
return m_Model; }
74 int64_t
version()
const {
return m_ModelVersion; }
112 size_t& bytes = m_TensorMaxBytes[name];
113 bytes = std::max<size_t>(maxBytes, bytes);
150 m_ResponseAllocator = std::move(allocator);
200 const std::string& tensor,
size_t bytes,
InferMemType memType, int64_t devId);
224 using PoolKey = std::tuple<std::string, int64_t, InferMemType>;
255 std::shared_ptr<TrtServerRequest> request,
256 std::unique_ptr<TrtServerResponse> uniqResponse,
318 int64_t m_ModelVersion = -1;
327 bool m_NeedUnload =
false;
331 std::vector<TritonClassParams> m_ClassifyParams;
343 int64_t m_OutputDevId = -1;
347 int m_PerPoolSize = 2;
351 std::map<PoolKey, PoolValue> m_ResponsePool;
355 using SharedMutex = std::shared_timed_mutex;
359 SharedMutex m_ResponseMutex;
363 std::unordered_map<std::string, size_t> m_TensorMaxBytes;
368 using ReorderThread = QueueThread<std::vector<ReorderItemPtr>>;
372 std::unique_ptr<ReorderThread> m_ReorderThread;
This is a header file for pre-processing cuda kernels with normalization and mean subtraction require...
SharedBufPool< UniqSysMem > PoolValue
The buffer pool for the specified tensor, GPU and memory type combination.
PoolValue findResponsePool(PoolKey &key)
Find the buffer pool for the given key.
#define INFER_MEM_ALIGNMENT
std::unique_ptr< TrtServerAllocator > UniqTritonAllocator
NvDsInferStatus initialize() override
Check that the server and model is ready, get the information of layers, setup reorder thread and out...
const std::string & model() const
Header file of the common declarations for the nvinferserver library.
virtual NvDsInferStatus ensureModelReady()
Check that the model is ready, load the model if it is not.
std::vector< InputShapeTuple > InputShapes
Header file containing utility functions and classes used by the nvinferserver low level library.
std::function< void(SharedBatchArray)> InputsConsumed
Function wrapper called after the input buffer is consumed.
SharedBatchArray outputs
Array of output batch buffers.
NvDsInferStatus specifyInputDims(const InputShapes &shapes) override
Specify the input layers for the backend.
std::shared_ptr< BaseBatchArray > SharedBatchArray
@ NVDSINFER_SUCCESS
NvDsInferContext operation succeeded.
void setAllocator(UniqTritonAllocator allocator)
Set the output tensor allocator.
InferMemType outputMemType() const
PoolValue createResponsePool(PoolKey &key, size_t bytes)
Create a new buffer pool for the key.
std::function< void(NvDsInferStatus, SharedBatchArray)> AsyncDone
Asynchronous inference done function: AsyncDone(Status, outputs).
bool debatchingOutput(SharedBatchArray &outputs, SharedBatchArray &inputs)
Separate the batch dimension from the output buffer descriptors.
std::function< void(NvDsInferStatus, SharedBatchArray)> InferenceDone
Function wrapper for post inference processing.
NvDsInferStatus status
Status of processing.
int64_t outputDevId() const
NvDsInferStatus ensureInputs(SharedBatchArray &inputs)
Ensure that the array of input buffers are expected by the model and reshape the input buffers if req...
std::shared_ptr< CudaStream > SharedCuStream
Cuda based pointers.
std::shared_ptr< TrtServerAllocator > ShrTritonAllocator
NvDsInferStatus enqueue(SharedBatchArray inputs, SharedCuStream stream, InputsConsumed bufConsumed, InferenceDone inferenceDone) override
Enqueue an input for inference request by calling Run() and adding corresponding task to the reorder ...
void setTensorMaxBytes(const std::string &name, size_t maxBytes)
Set the maximum size for the tensor, the maximum of the existing size and new input size is used.
void setOutputPoolSize(int size)
Helper function to access the member variables.
InferenceDone inferenceDone
Inference done callback function.
virtual NvDsInferStatus ensureServerReady()
Check that the Triton inference server is live.
virtual void requestTritonOutputNames(std::set< std::string > &outNames)
Get the list of output tensor names.
virtual NvDsInferStatus Run(SharedBatchArray inputs, InputsConsumed bufConsumed, AsyncDone asyncDone)
Create an inference request and trigger asynchronous inference.
std::shared_ptr< SysMem > SharedSysMem
std::shared_ptr< TrtISServer > TrtServerPtr
#define INFER_ROUND_UP(value, align)
SharedSysMem allocateResponseBuf(const std::string &tensor, size_t bytes, InferMemType memType, int64_t devId)
Acquire a buffer from the output buffer pool associated with the device ID and memory type.
std::vector< TritonClassParams > getClassifyParams()
InferMemType
The memory types of inference buffers.
std::tuple< std::string, int64_t, InferMemType > PoolKey
Tuple holding tensor name, GPU ID, memory type.
void setOutputMemType(InferMemType memType)
void releaseResponseBuf(const std::string &tensor, SharedSysMem mem)
Release the output tensor buffer.
void setOutputDevId(int64_t devId)
bool inferenceDoneReorderLoop(ReorderItemPtr item)
Add input buffers to the output buffer list if required.
~TrtISBackend() override
Destructor.
int outputPoolSize() const
void serverInferCompleted(std::shared_ptr< TrtServerRequest > request, std::unique_ptr< TrtServerResponse > uniqResponse, InputsConsumed inputsConsumed, AsyncDone asyncDone)
Call the inputs consumed function and parse the inference response to form the array of output batch ...
NvDsInferStatus fixateDims(const SharedBatchArray &bufs)
Extend the dimensions to include batch size for the buffers in input array.
NvDsInferStatus setupReorderThread()
Create a loop thread that calls inferenceDoneReorderLoop on the queued items.
SharedBatchArray inputs
Array of input batch buffers.
TrtServerPtr & server()
Get the Triton server handle.
std::promise< void > promise
Synchronization objects.
std::future< void > future
TrtISBackend(const std::string &name, int64_t version, TrtServerPtr ptr=nullptr)
Constructor.
virtual NvDsInferStatus setupLayersInfo()
Get the model configuration from the server and populate layer information.
std::shared_ptr< ReorderItem > ReorderItemPtr
void addClassifyParams(const TritonClassParams &c)
Add Triton Classification parameters to the list.
Header file for inference processing backend base class.
NvDsInferStatus
Enum for the status codes returned by NvDsInferContext.