NVIDIA Docs Hub NVIDIA Morpheus Program Listing for File triton_inference.hpp

Program Listing for File triton_inference.hpp

↰ Return to documentation for file (morpheus/_lib/include/morpheus/stages/triton_inference.hpp)

Copy
Copied!

            
            /*
* SPDX-FileCopyrightText: Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include "morpheus/messages/multi_inference.hpp"
#include "morpheus/messages/multi_response.hpp" // for MultiResponseMessage
#include "morpheus/objects/triton_in_out.hpp"
#include "morpheus/types.hpp"

#include <boost/fiber/future/future.hpp>
#include <http_client.h>
#include <mrc/node/rx_sink_base.hpp>
#include <mrc/node/rx_source_base.hpp>
#include <mrc/node/sink_properties.hpp>
#include <mrc/node/source_properties.hpp>
#include <mrc/segment/builder.hpp>
#include <mrc/segment/object.hpp>
#include <mrc/types.hpp>
#include <pymrc/node.hpp>
#include <rxcpp/rx.hpp> // for apply, make_subscriber, observable_member, is_on_error<>::not_void, is_on_next_of<>::not_void, from
// IWYU pragma: no_include "rxcpp/sources/rx-iterate.hpp"

#include <map>
#include <memory>
#include <string>
#include <vector>

namespace morpheus {
/****** Component public implementations *******************/
/****** InferenceClientStage********************************/

#pragma GCC visibility push(default)
class InferenceClientStage
  : public mrc::pymrc::PythonNode<std::shared_ptr<MultiInferenceMessage>, std::shared_ptr<MultiResponseMessage>>
{
  public:
    using base_t =
        mrc::pymrc::PythonNode<std::shared_ptr<MultiInferenceMessage>, std::shared_ptr<MultiResponseMessage>>;
    using typename base_t::sink_type_t;
    using typename base_t::source_type_t;
    using typename base_t::subscribe_fn_t;

    InferenceClientStage(std::string model_name,
                         std::string server_url,
                         bool force_convert_inputs,
                         bool use_shared_memory,
                         bool needs_logits,
                         std::map<std::string, std::string> inout_mapping = {});

  private:
    bool is_default_grpc_port(std::string& server_url);

    void connect_with_server();

    subscribe_fn_t build_operator();

    std::string m_model_name;
    std::string m_server_url;
    bool m_force_convert_inputs;
    bool m_use_shared_memory;
    bool m_needs_logits{true};
    std::map<std::string, std::string> m_inout_mapping;

    // Below are settings created during handshake with server
    // std::shared_ptr
 
   m_client;
 
    std::vector<TritonInOut> m_model_inputs;
    std::vector<TritonInOut> m_model_outputs;
    triton::client::InferOptions m_options;
    TensorIndex m_max_batch_size{-1};
};

/****** InferenceClientStageInferenceProxy******************/
struct InferenceClientStageInterfaceProxy
{
    static std::shared_ptr<mrc::segment::Object<InferenceClientStage>> init(
        mrc::segment::Builder& builder,
        const std::string& name,
        std::string model_name,
        std::string server_url,
        bool force_convert_inputs,
        bool use_shared_memory,
        bool needs_logits,
        std::map<std::string, std::string> inout_mapping);
};
#pragma GCC visibility pop// end of group
}  // namespace morpheus