|
|
NVIDIA DeepStream SDK API Reference
|
8.0 Release
|
Go to the documentation of this file.
21 #ifndef __INFER_GRPC_CLIENT_H__
22 #define __INFER_GRPC_CLIENT_H__
25 #include <condition_variable>
32 #include "infer_icontext.h"
33 #include "infer_post_datatypes.h"
35 #include "grpc_client.h"
38 namespace tc = triton::client;
42 typedef std::map<std::string, std::string>
Headers;
71 std::vector<std::shared_ptr<tc::InferInput>>
inputs() {
return m_InferInputs; }
72 std::vector<std::shared_ptr<tc::InferRequestedOutput>>
outputs() {
return m_RequestOutputs; }
73 std::shared_ptr<tc::InferOptions>
getOption() {
return m_InferOptions;}
76 std::vector<std::string>
getOutNames() {
return m_OutputNames;}
78 void setOutNames(std::vector<std::string> outnames) {m_OutputNames = outnames;}
85 m_CpuData.push_back(data);
92 m_InputCudaBufNames.push_back(bufName);
99 std::vector<std::shared_ptr<tc::InferInput>> m_InferInputs;
103 std::vector<std::shared_ptr<tc::InferRequestedOutput>> m_RequestOutputs;
107 std::shared_ptr<tc::InferOptions> m_InferOptions;
111 std::vector<std::string> m_OutputNames;
119 std::vector<void*> m_CpuData;
123 std::vector<std::string> m_InputCudaBufNames;
155 std::string &model_name, std::string &model_version);
165 const std::string &version =
"",
const Headers &headers =
Headers());
177 bool isModelReady(
const std::string &model,
const std::string version =
"");
199 const std::vector<std::string> &outputs,
200 const std::vector<TritonClassParams>& classList = std::vector<TritonClassParams>());
239 tc::Error SetInputCudaSharedMemory(tc::InferInput *inferInput,
250 bool m_EnableCudaBufferSharing;
254 std::unique_ptr<tc::InferenceServerGrpcClient> m_GrpcClient;
258 std::atomic<uint64_t> m_LastRequestId{UINT64_C(0)};
void setInputBatchArray(SharedIBatchArray inputBatch)
std::vector< std::string > getInputCudaBufNames()
This is a header file for pre-processing cuda kernels with normalization and mean subtraction require...
NvDsInferStatus inferAsync(SharedGrpcRequest request, TritonGrpcAsyncDone done)
Get the inference input and output list from the request and trigger the asynchronous inference reque...
std::function< void(NvDsInferStatus, SharedBatchArray)> TritonGrpcAsyncDone
bool isServerLive()
Check if the Triton Inference Server is live.
SharedIBatchArray inputBatchArray()
std::map< std::string, std::string > Headers
InferGrpcClient(std::string url, bool enableCudaBufferSharing)
Constructor, save the server server URL and CUDA sharing flag.
~InferGrpcClient()
Destructor, default.
std::shared_ptr< BaseBatchBuffer > SharedBatchBuf
Common buffer interfaces (internal).
std::shared_ptr< BaseBatchArray > SharedBatchArray
NvDsInferStatus LoadModel(const std::string &model_name, const Headers &headers=Headers())
Request to load the given model using the Triton client library.
std::vector< std::string > getOutNames()
void attachData(void *data)
Append the array of host memory allocations.
bool isServerReady()
Check if the Triton Inference Server is ready.
bool isModelReady(const std::string &model, const std::string version="")
Check if the specified model is ready for inference.
Header file of the common declarations for the nvinferserver library.
std::shared_ptr< tc::InferOptions > getOption()
~TritonGrpcRequest()
Destructor, free the host memory allocated for the request.
void attachInputCudaBuffer(std::string bufName)
Append the list of shared CUDA input buffers.
std::vector< std::shared_ptr< tc::InferRequestedOutput > > outputs()
void setOutNames(std::vector< std::string > outnames)
NvDsInferStatus getModelMetadata(inference::ModelMetadataResponse *model_metadata, std::string &model_name, std::string &model_version)
Get the model metadata from the Triton Inference server.
NvDsInferStatus appendInput(const std::shared_ptr< tc::InferInput > &input)
Append the array of Triton client library inference input objects.
std::vector< std::shared_ptr< tc::InferInput > > inputs()
NvDsInferStatus setOutput(const std::vector< std::shared_ptr< tc::InferRequestedOutput >> &output)
Helper functions to access the member variables.
Triton gRPC inference request class holding data associated with one inference request.
NvDsInferStatus UnloadModel(const std::string &model_name, const Headers &headers=Headers())
Request to unload the given model using the Triton client library.
Wrapper class for the gRPC client of the Triton Inference Server, interfaces with the Triton client l...
NvDsInferStatus Initialize()
Create the gRPC client instance of the Triton Client library.
NvDsInferStatus getModelConfig(inference::ModelConfigResponse *config, const std::string &name, const std::string &version="", const Headers &headers=Headers())
Get the model configuration from the Triton Inference Server.
std::shared_ptr< TritonGrpcRequest > SharedGrpcRequest
std::shared_ptr< IBatchArray > SharedIBatchArray
NvDsInferStatus setOption(std::shared_ptr< tc::InferOptions > &option)
SharedGrpcRequest createRequest(const std::string &model, const std::string &version, SharedIBatchArray input, const std::vector< std::string > &outputs, const std::vector< TritonClassParams > &classList=std::vector< TritonClassParams >())
Create a new gRPC inference request.
NvDsInferStatus
Enum for the status codes returned by NvDsInferContext.