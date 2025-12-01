Program Listing for File gpu_resident_executor.hpp
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef HOLOSCAN_CORE_EXECUTORS_GPU_RESIDENT_GPU_RESIDENT_EXECUTOR_HPP
#define HOLOSCAN_CORE_EXECUTORS_GPU_RESIDENT_GPU_RESIDENT_EXECUTOR_HPP
#include <cuda_runtime.h>
#include <fmt/format.h>
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "gpu_resident_deck.hpp"
#include "holoscan/core/execution_context.hpp"
#include "holoscan/core/executor.hpp"
#include "holoscan/core/gpu_resident_operator.hpp"
#include "holoscan/utils/cuda/buffer.hpp"
namespace holoscan {
class GPUResidentExecutor : public Executor {
public:
GPUResidentExecutor() = delete;
explicit GPUResidentExecutor(Fragment* fragment) : Executor(fragment) {
gpu_resident_deck_ = std::make_shared<GPUResidentDeck>();
}
~GPUResidentExecutor();
void run(OperatorGraph& graph) override;
std::future<void> run_async(OperatorGraph& graph) override;
void context([[maybe_unused]] void* context) override {
throw std::runtime_error("GPUResidentExecutor does not support context");
}
bool initialize_fragment() override;
bool initialize_operator([[maybe_unused]] Operator* op) override;
bool initialize_scheduler([[maybe_unused]] Scheduler* sch) override {
throw std::runtime_error("GPUResidentExecutor does not support any scheduler");
}
bool initialize_network_context([[maybe_unused]] NetworkContext* network_context) override {
throw std::runtime_error("GPUResidentExecutor does not support any network context");
}
bool initialize_fragment_services() override {
throw std::runtime_error("GPUResidentExecutor does not support any fragment services");
}
void prepare_data_flow(std::shared_ptr<OperatorGraph> graph);
void initialize_cuda();
void* device_memory(std::shared_ptr<Operator> op, const std::string& port_name);
virtual bool verify_graph_topology(OperatorGraph& graph);
void create_gpu_resident_cuda_graph();
void timeout_ms(unsigned long long timeout_ms);
void tear_down();
bool result_ready();
void data_ready();
bool is_launched();
std::shared_ptr<ExecutionContext> execution_context() { return exec_context_; }
std::shared_ptr<cudaStream_t> graph_capture_stream();
private:
void allocate_io_device_buffer(std::shared_ptr<Operator> downstream_op,
std::shared_ptr<Operator> upstream_op,
const std::string& source_port, const std::string& target_port,
size_t memory_block_size);
bool fragment_initialized_ = false;
std::unordered_map<std::string, std::shared_ptr<holoscan::utils::cuda::DeviceBuffer>>
io_device_buffers_;
std::vector<std::shared_ptr<Operator>> topo_ordered_operators_;
std::shared_ptr<ExecutionContext> exec_context_;
unsigned long long timeout_ms_ = 0;
std::shared_ptr<cudaStream_t> graph_capture_stream_;
cudaGraph_t workload_graph_ = nullptr;
cudaGraph_t gpu_resident_graph_ =
nullptr;
std::shared_ptr<GPUResidentDeck> gpu_resident_deck_;
};
} // namespace holoscan
#endif// HOLOSCAN_CORE_EXECUTORS_GPU_RESIDENT_GPU_RESIDENT_EXECUTOR_HPP