NVIDIA DeepStream SDK API Reference

6.4 Release
infer_grpc_client.h
Go to the documentation of this file.
1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
4  *
5  * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
6  * property and proprietary rights in and to this material, related
7  * documentation and any modifications thereto. Any use, reproduction,
8  * disclosure or distribution of this material and related documentation
9  * without an express license agreement from NVIDIA CORPORATION or
10  * its affiliates is strictly prohibited.
11  */
12 
21 #ifndef __INFER_GRPC_CLIENT_H__
22 #define __INFER_GRPC_CLIENT_H__
23 
24 #include <stdarg.h>
25 #include <condition_variable>
26 #include <functional>
27 #include <list>
28 #include <memory>
29 #include <mutex>
30 #include <queue>
31 
32 #include "infer_icontext.h"
33 #include "infer_post_datatypes.h"
34 #include "infer_common.h"
35 #include "grpc_client.h"
36 
37 
38 namespace tc = triton::client;
39 
40 namespace nvdsinferserver {
41 
42 typedef std::map<std::string, std::string> Headers;
43 
44 class TritonGrpcRequest;
45 
46 using SharedGrpcRequest = std::shared_ptr<TritonGrpcRequest>;
47 
49 
50 
56 public:
64  NvDsInferStatus appendInput(const std::shared_ptr<tc::InferInput> &input);
69  NvDsInferStatus setOutput(const std::vector<std::shared_ptr<tc::InferRequestedOutput>> &output);
70  NvDsInferStatus setOption(std::shared_ptr<tc::InferOptions> &option);
71  std::vector<std::shared_ptr<tc::InferInput>> inputs() { return m_InferInputs; }
72  std::vector<std::shared_ptr<tc::InferRequestedOutput>> outputs() { return m_RequestOutputs; }
73  std::shared_ptr<tc::InferOptions> getOption() {return m_InferOptions;}
74  SharedIBatchArray inputBatchArray() { return m_InputBatchArray;}
75  void setInputBatchArray(SharedIBatchArray inputBatch) { m_InputBatchArray = inputBatch;}
76  std::vector<std::string> getOutNames() { return m_OutputNames;}
77  std::vector<std::string> getInputCudaBufNames() { return m_InputCudaBufNames;}
78  void setOutNames(std::vector<std::string> outnames) {m_OutputNames = outnames;}
84  void attachData(void *data) {
85  m_CpuData.push_back(data);
86  }
87 
91  void attachInputCudaBuffer(std::string bufName) {
92  m_InputCudaBufNames.push_back(bufName);
93  }
94 
95 private:
99  std::vector<std::shared_ptr<tc::InferInput>> m_InferInputs;
103  std::vector<std::shared_ptr<tc::InferRequestedOutput>> m_RequestOutputs;
107  std::shared_ptr<tc::InferOptions> m_InferOptions;
111  std::vector<std::string> m_OutputNames;
115  SharedIBatchArray m_InputBatchArray;
119  std::vector<void*> m_CpuData;
123  std::vector<std::string> m_InputCudaBufNames;
124 };
125 
131 public:
137  InferGrpcClient (std::string url, bool enableCudaBufferSharing);
154  NvDsInferStatus getModelMetadata(inference::ModelMetadataResponse *model_metadata,
155  std::string &model_name, std::string &model_version);
164  NvDsInferStatus getModelConfig(inference::ModelConfigResponse *config, const std::string &name,
165  const std::string &version = "", const Headers &headers = Headers());
169  bool isServerLive();
173  bool isServerReady();
177  bool isModelReady(const std::string &model, const std::string version = "");
181  NvDsInferStatus LoadModel(const std::string& model_name, const Headers& headers = Headers());
185  NvDsInferStatus UnloadModel(const std::string& model_name, const Headers& headers = Headers());
198  SharedGrpcRequest createRequest(const std::string& model, const std::string &version, SharedIBatchArray input,
199  const std::vector<std::string> &outputs,
200  const std::vector<TritonClassParams>& classList = std::vector<TritonClassParams>());
201 
211 
212 private:
221  void InferComplete (tc::InferResult *result, SharedGrpcRequest request,
222  TritonGrpcAsyncDone done);
230  NvDsInferStatus parseOptions(tc::InferOptions *outOpt, const IOptions *inOpt);
239  tc::Error SetInputCudaSharedMemory(tc::InferInput *inferInput,
240  const SharedBatchBuf& inbuf, SharedGrpcRequest request, uint64_t bufId);
241 
242 private:
246  std::string m_Url;
250  bool m_EnableCudaBufferSharing;
254  std::unique_ptr<tc::InferenceServerGrpcClient> m_GrpcClient;
258  std::atomic<uint64_t> m_LastRequestId{UINT64_C(0)};
259 };
260 
261 } // namespace nvdsinferserver
262 
263 #endif
nvdsinferserver::TritonGrpcRequest::setInputBatchArray
void setInputBatchArray(SharedIBatchArray inputBatch)
Definition: infer_grpc_client.h:75
nvdsinferserver::TritonGrpcRequest::getInputCudaBufNames
std::vector< std::string > getInputCudaBufNames()
Definition: infer_grpc_client.h:77
nvdsinferserver
Copyright (c) 2021, NVIDIA CORPORATION.
Definition: infer_custom_process.h:28
nvdsinferserver::InferGrpcClient::inferAsync
NvDsInferStatus inferAsync(SharedGrpcRequest request, TritonGrpcAsyncDone done)
Get the inference input and output list from the request and trigger the asynchronous inference reque...
nvdsinferserver::SharedBatchBuf
std::shared_ptr< BaseBatchBuffer > SharedBatchBuf
Common buffer interfaces (internal).
Definition: infer_common.h:71
nvdsinferserver::InferGrpcClient::isServerLive
bool isServerLive()
Check if the Triton Inference Server is live.
nvdsinferserver::TritonGrpcRequest::inputBatchArray
SharedIBatchArray inputBatchArray()
Definition: infer_grpc_client.h:74
nvdsinferserver::InferGrpcClient::InferGrpcClient
InferGrpcClient(std::string url, bool enableCudaBufferSharing)
Constructor, save the server server URL and CUDA sharing flag.
nvdsinferserver::InferGrpcClient::~InferGrpcClient
~InferGrpcClient()
Destructor, default.
nvdsinferserver::InferGrpcClient::LoadModel
NvDsInferStatus LoadModel(const std::string &model_name, const Headers &headers=Headers())
Request to load the given model using the Triton client library.
nvdsinferserver::TritonGrpcRequest::getOutNames
std::vector< std::string > getOutNames()
Definition: infer_grpc_client.h:76
nvdsinferserver::TritonGrpcRequest::attachData
void attachData(void *data)
Append the array of host memory allocations.
Definition: infer_grpc_client.h:84
nvdsinferserver::InferGrpcClient::isServerReady
bool isServerReady()
Check if the Triton Inference Server is ready.
nvdsinferserver::Headers
std::map< std::string, std::string > Headers
Definition: infer_grpc_client.h:42
nvdsinferserver::InferGrpcClient::isModelReady
bool isModelReady(const std::string &model, const std::string version="")
Check if the specified model is ready for inference.
infer_common.h
Header file of the common declarations for the nvinferserver library.
nvdsinferserver::TritonGrpcRequest::getOption
std::shared_ptr< tc::InferOptions > getOption()
Definition: infer_grpc_client.h:73
nvdsinferserver::TritonGrpcRequest::~TritonGrpcRequest
~TritonGrpcRequest()
Destructor, free the host memory allocated for the request.
nvdsinferserver::TritonGrpcRequest::attachInputCudaBuffer
void attachInputCudaBuffer(std::string bufName)
Append the list of shared CUDA input buffers.
Definition: infer_grpc_client.h:91
nvdsinferserver::SharedIBatchArray
std::shared_ptr< IBatchArray > SharedIBatchArray
Definition: infer_datatypes.h:205
nvdsinferserver::TritonGrpcRequest::outputs
std::vector< std::shared_ptr< tc::InferRequestedOutput > > outputs()
Definition: infer_grpc_client.h:72
nvdsinferserver::TritonGrpcRequest::setOutNames
void setOutNames(std::vector< std::string > outnames)
Definition: infer_grpc_client.h:78
infer_icontext.h
Inference context library interface header file.
nvdsinferserver::InferGrpcClient::getModelMetadata
NvDsInferStatus getModelMetadata(inference::ModelMetadataResponse *model_metadata, std::string &model_name, std::string &model_version)
Get the model metadata from the Triton Inference server.
nvdsinferserver::TritonGrpcRequest::appendInput
NvDsInferStatus appendInput(const std::shared_ptr< tc::InferInput > &input)
Append the array of Triton client library inference input objects.
nvdsinferserver::TritonGrpcRequest::inputs
std::vector< std::shared_ptr< tc::InferInput > > inputs()
Definition: infer_grpc_client.h:71
infer_post_datatypes.h
nvdsinferserver::TritonGrpcRequest::setOutput
NvDsInferStatus setOutput(const std::vector< std::shared_ptr< tc::InferRequestedOutput >> &output)
Helper functions to access the member variables.
nvdsinferserver::TritonGrpcRequest
Triton gRPC inference request class holding data associated with one inference request.
Definition: infer_grpc_client.h:55
nvdsinferserver::InferGrpcClient::UnloadModel
NvDsInferStatus UnloadModel(const std::string &model_name, const Headers &headers=Headers())
Request to unload the given model using the Triton client library.
nvdsinferserver::InferGrpcClient
Wrapper class for the gRPC client of the Triton Inference Server, interfaces with the Triton client l...
Definition: infer_grpc_client.h:130
nvdsinferserver::IOptions
Definition: infer_ioptions.h:57
nvdsinferserver::InferGrpcClient::Initialize
NvDsInferStatus Initialize()
Create the gRPC client instance of the Triton Client library.
nvdsinferserver::SharedGrpcRequest
std::shared_ptr< TritonGrpcRequest > SharedGrpcRequest
Definition: infer_grpc_client.h:46
nvdsinferserver::InferGrpcClient::getModelConfig
NvDsInferStatus getModelConfig(inference::ModelConfigResponse *config, const std::string &name, const std::string &version="", const Headers &headers=Headers())
Get the model configuration from the Triton Inference Server.
nvdsinferserver::TritonGrpcRequest::setOption
NvDsInferStatus setOption(std::shared_ptr< tc::InferOptions > &option)
nvdsinferserver::InferGrpcClient::createRequest
SharedGrpcRequest createRequest(const std::string &model, const std::string &version, SharedIBatchArray input, const std::vector< std::string > &outputs, const std::vector< TritonClassParams > &classList=std::vector< TritonClassParams >())
Create a new gRPC inference request.
nvdsinferserver::SharedBatchArray
std::shared_ptr< BaseBatchArray > SharedBatchArray
Definition: infer_common.h:75
nvdsinferserver::TritonGrpcAsyncDone
std::function< void(NvDsInferStatus, SharedBatchArray)> TritonGrpcAsyncDone
Definition: infer_grpc_client.h:48
NvDsInferStatus
NvDsInferStatus
Enum for the status codes returned by NvDsInferContext.
Definition: nvdsinfer.h:218