|
|
NVIDIA DeepStream SDK API Reference
|
7.1 Release
|
Go to the documentation of this file.
21 #ifndef __INFER_GRPC_CLIENT_H__
22 #define __INFER_GRPC_CLIENT_H__
25 #include <condition_variable>
35 #include "grpc_client.h"
38 namespace tc = triton::client;
42 typedef std::map<std::string, std::string>
Headers;
71 std::vector<std::shared_ptr<tc::InferInput>>
inputs() {
return m_InferInputs; }
72 std::vector<std::shared_ptr<tc::InferRequestedOutput>>
outputs() {
return m_RequestOutputs; }
73 std::shared_ptr<tc::InferOptions>
getOption() {
return m_InferOptions;}
76 std::vector<std::string>
getOutNames() {
return m_OutputNames;}
78 void setOutNames(std::vector<std::string> outnames) {m_OutputNames = outnames;}
85 m_CpuData.push_back(data);
93 std::vector<std::shared_ptr<tc::InferInput>> m_InferInputs;
97 std::vector<std::shared_ptr<tc::InferRequestedOutput>> m_RequestOutputs;
101 std::shared_ptr<tc::InferOptions> m_InferOptions;
105 std::vector<std::string> m_OutputNames;
113 std::vector<void*> m_CpuData;
117 std::vector<std::string> m_InputCudaBufNames;
149 std::string &model_name, std::string &model_version);
159 const std::string &version =
"",
const Headers &headers =
Headers());
171 bool isModelReady(
const std::string &model,
const std::string version =
"");
193 const std::vector<std::string> &outputs,
194 const std::vector<TritonClassParams>& classList = std::vector<TritonClassParams>());
233 tc::Error SetInputCudaSharedMemory(tc::InferInput *inferInput,
244 bool m_EnableCudaBufferSharing;
248 std::unique_ptr<tc::InferenceServerGrpcClient> m_GrpcClient;
252 std::atomic<uint64_t> m_LastRequestId{UINT64_C(0)};
256 std::unordered_map<std::string, std::string> m_RegisteredBuffersHash;
void setInputBatchArray(SharedIBatchArray inputBatch)
std::vector< std::string > getInputCudaBufNames()
This is a header file for pre-processing cuda kernels with normalization and mean subtraction require...
NvDsInferStatus inferAsync(SharedGrpcRequest request, TritonGrpcAsyncDone done)
Get the inference input and output list from the request and trigger the asynchronous inference reque...
std::shared_ptr< BaseBatchBuffer > SharedBatchBuf
Common buffer interfaces (internal).
bool isServerLive()
Check if the Triton Inference Server is live.
SharedIBatchArray inputBatchArray()
InferGrpcClient(std::string url, bool enableCudaBufferSharing)
Constructor, save the server server URL and CUDA sharing flag.
~InferGrpcClient()
Destructor, default.
NvDsInferStatus LoadModel(const std::string &model_name, const Headers &headers=Headers())
Request to load the given model using the Triton client library.
std::vector< std::string > getOutNames()
void attachData(void *data)
Append the array of host memory allocations.
bool isServerReady()
Check if the Triton Inference Server is ready.
std::map< std::string, std::string > Headers
bool isModelReady(const std::string &model, const std::string version="")
Check if the specified model is ready for inference.
Header file of the common declarations for the nvinferserver library.
std::shared_ptr< tc::InferOptions > getOption()
~TritonGrpcRequest()
Destructor, free the host memory allocated for the request.
std::shared_ptr< IBatchArray > SharedIBatchArray
std::vector< std::shared_ptr< tc::InferRequestedOutput > > outputs()
void setOutNames(std::vector< std::string > outnames)
Inference context library interface header file.
NvDsInferStatus getModelMetadata(inference::ModelMetadataResponse *model_metadata, std::string &model_name, std::string &model_version)
Get the model metadata from the Triton Inference server.
NvDsInferStatus appendInput(const std::shared_ptr< tc::InferInput > &input)
Append the array of Triton client library inference input objects.
std::vector< std::shared_ptr< tc::InferInput > > inputs()
NvDsInferStatus setOutput(const std::vector< std::shared_ptr< tc::InferRequestedOutput >> &output)
Helper functions to access the member variables.
Triton gRPC inference request class holding data associated with one inference request.
NvDsInferStatus UnloadModel(const std::string &model_name, const Headers &headers=Headers())
Request to unload the given model using the Triton client library.
Wrapper class for the gRPC client of the Triton Inference Server, interfaces with the Triton client l...
NvDsInferStatus Initialize()
Create the gRPC client instance of the Triton Client library.
std::shared_ptr< TritonGrpcRequest > SharedGrpcRequest
NvDsInferStatus getModelConfig(inference::ModelConfigResponse *config, const std::string &name, const std::string &version="", const Headers &headers=Headers())
Get the model configuration from the Triton Inference Server.
NvDsInferStatus setOption(std::shared_ptr< tc::InferOptions > &option)
SharedGrpcRequest createRequest(const std::string &model, const std::string &version, SharedIBatchArray input, const std::vector< std::string > &outputs, const std::vector< TritonClassParams > &classList=std::vector< TritonClassParams >())
Create a new gRPC inference request.
std::shared_ptr< BaseBatchArray > SharedBatchArray
std::function< void(NvDsInferStatus, SharedBatchArray)> TritonGrpcAsyncDone
NvDsInferStatus
Enum for the status codes returned by NvDsInferContext.