Program Listing for File gpu_resident_inference.hpp
↰ Return to documentation for file (include/holoscan/operators/gpu_resident_inference/gpu_resident_inference.hpp)
/*
* SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef HOLOSCAN_OPERATORS_GPU_RESIDENT_INFERENCE_GPU_RESIDENT_INFERENCE_HPP
#define HOLOSCAN_OPERATORS_GPU_RESIDENT_INFERENCE_GPU_RESIDENT_INFERENCE_HPP
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "holoscan/core/gpu_resident_operator.hpp"
#include "holoscan/core/io_context.hpp"
#include "holoscan/core/io_spec.hpp"
#include "holoscan/core/operator.hpp"
#include "holoscan/core/operator_spec.hpp"
#include <holoinfer.hpp>
#include <holoinfer_buffer.hpp>
namespace HoloInfer = holoscan::inference;
namespace holoscan::ops {
class GPUResidentInferenceOp : public holoscan::GPUResidentOperator {
public:
HOLOSCAN_OPERATOR_FORWARD_ARGS_SUPER(GPUResidentInferenceOp, holoscan::GPUResidentOperator)
GPUResidentInferenceOp() = default;
explicit GPUResidentInferenceOp(const std::string& config_file);
void setup(OperatorSpec& spec) override;
void start() override;
void compute([[maybe_unused]] InputContext& op_input, [[maybe_unused]] OutputContext& op_output,
[[maybe_unused]] ExecutionContext& context) override;
void stop() override;
private:
std::string backend_ = "trt";
std::string config_file_;
std::vector<std::string> in_tensor_names_;
std::vector<std::string> out_tensor_names_;
std::map<std::string, std::vector<std::string>> batch_sizes_;
std::map<std::string, std::string> model_path_map_;
std::map<std::string, std::vector<std::string>> pre_processor_map_;
std::map<std::string, std::vector<std::string>> inference_map_;
// NOTE 1: Below parameters are not tested in the current release
std::map<std::string, std::string> device_map_;
std::map<std::string, std::string> dla_core_map_;
std::map<std::string, std::string> temporal_map_;
std::map<std::string, std::string> activation_map_;
std::map<std::string, std::string> backend_map_;
bool parallel_inference_ = true;
bool infer_on_cpu_ = false;
bool enable_fp16_ = false;
bool input_on_cuda_ = true;
bool output_on_cuda_ = true;
bool is_engine_path_ = false;
bool use_cuda_graphs_ = false;
int32_t dla_core_ = -1;
bool dla_gpu_fallback_ = true;
bool dynamic_input_dims_ = false;
// NOTE 1 ends
std::map<std::string, size_t> tensor_to_buffersize_;
std::map<std::string, HoloInfer::holoinfer_datatype> tensor_to_datatype_;
std::unique_ptr<HoloInfer::InferContext> holoscan_infer_context_;
std::shared_ptr<HoloInfer::InferenceSpecs> inference_specs_;
std::map<std::string, std::vector<int>> dims_per_tensor_;
const std::string module_{"GPU Resident Inference Operator"};
};
} // namespace holoscan::ops
#endif/* HOLOSCAN_OPERATORS_GPU_RESIDENT_INFERENCE_GPU_RESIDENT_INFERENCE_HPP */