Class InferenceClientStage#
Defined in File inference_client_stage.hpp
Inheritance Relationships#
Base Type#
public mrc::pymrc::AsyncioRunnable< std::shared_ptr< ControlMessage >, std::shared_ptr< ControlMessage > >
Class Documentation#
-
class InferenceClientStage : public mrc::pymrc::AsyncioRunnable<std::shared_ptr<ControlMessage>, std::shared_ptr<ControlMessage>>#
Perform inference with Triton Inference Server. This class specifies which inference implementation category (Ex: NLP/FIL) is needed for inferencing.
Public Types
-
using sink_type_t = std::shared_ptr<ControlMessage>#
-
using source_type_t = std::shared_ptr<ControlMessage>#
Public Functions
- InferenceClientStage(
- std::unique_ptr<IInferenceClient> &&client,
- std::string model_name,
- bool needs_logits,
- std::vector<TensorModelMapping> input_mapping,
- std::vector<TensorModelMapping> output_mapping
Construct a new Inference Client Stage object.
- Parameters:
client – : Inference client instance.
model_name – : Name of the model specifies which model can handle the inference requests that are sent to Triton inference
needs_logits – : Determines if logits are required.
force_convert_inputs – : Determines if inputs should be converted to the model’s input format.
inout_mapping – : Dictionary used to map pipeline input/output names to Triton input/output names. Use this if the Morpheus names do not match the model.
- std::shared_ptr<ControlMessage> &&data,
- std::shared_ptr<mrc::coroutines::Scheduler> on
Process a single InputT by running the constructor-provided inference client against it’s Tensor, and yields the result as a OutputT
-
using sink_type_t = std::shared_ptr<ControlMessage>#