↰ Return to documentation for file (morpheus/_lib/include/morpheus/stages/triton_inference.hpp
)
#pragma once
#include "morpheus/messages/multi_inference.hpp"
#include "morpheus/messages/multi_response_probs.hpp"
#include "morpheus/objects/triton_in_out.hpp"
#include <http_client.h>
#include <mrc/channel/status.hpp>// for Status
#include <mrc/node/sink_properties.hpp>// for SinkProperties<>::sink_type_t
#include <mrc/node/source_properties.hpp>// for SourceProperties<>::source_type_t
#include <mrc/segment/builder.hpp>
#include <mrc/segment/object.hpp>// for Object
#include <pymrc/node.hpp>
#include <rxcpp/rx.hpp>// for apply, make_subscriber, observable_member, is_on_error<>::not_void, is_on_next_of<>::not_void, from
#include <map>
#include <memory>
#include <string>
#include <vector>
namespace morpheus {
/****** Component public implementations *******************/
/****** InferenceClientStage********************************/
#pragma GCC visibility push(default)
class InferenceClientStage
: public mrc::pymrc::PythonNode<std::shared_ptr<MultiInferenceMessage>, std::shared_ptr<MultiResponseProbsMessage>>
{
public:
using base_t =
mrc::pymrc::PythonNode<std::shared_ptr<MultiInferenceMessage>, std::shared_ptr<MultiResponseProbsMessage>>;
using typename base_t::sink_type_t;
using typename base_t::source_type_t;
using typename base_t::subscribe_fn_t;
InferenceClientStage(std::string model_name,
std::string server_url,
bool force_convert_inputs,
bool use_shared_memory,
bool needs_logits,
std::map<std::string, std::string> inout_mapping = {});
private:
bool is_default_grpc_port(std::string &server_url);
void connect_with_server();
subscribe_fn_t build_operator();
std::string m_model_name;
std::string m_server_url;
bool m_force_convert_inputs;
bool m_use_shared_memory;
bool m_needs_logits{true};
std::map<std::string, std::string> m_inout_mapping;
// Below are settings created during handshake with server
// std::shared_ptr<triton::client::InferenceServerHttpClient> m_client;
std::vector<TritonInOut> m_model_inputs;
std::vector<TritonInOut> m_model_outputs;
triton::client::InferOptions m_options;
int m_max_batch_size{-1};
};
/****** InferenceClientStageInferenceProxy******************/
struct InferenceClientStageInterfaceProxy
{
static std::shared_ptr<mrc::segment::Object<InferenceClientStage>> init(
mrc::segment::Builder& builder,
const std::string& name,
std::string model_name,
std::string server_url,
bool force_convert_inputs,
bool use_shared_memory,
bool needs_logits,
std::map<std::string, std::string> inout_mapping);
};
#pragma GCC visibility pop// end of group
} // namespace morpheus