NVIDIA Holoscan SDK v3.9.0
NVIDIA Docs Hub Homepage  NVIDIA Holoscan  NVIDIA Holoscan SDK v3.9.0  Program Listing for File gpu_resident_executor.hpp

Program Listing for File gpu_resident_executor.hpp

Return to documentation for file (include/holoscan/core/executors/gpu_resident/gpu_resident_executor.hpp)

Copy
Copied!
            

            
/*
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef HOLOSCAN_CORE_EXECUTORS_GPU_RESIDENT_GPU_RESIDENT_EXECUTOR_HPP
#define HOLOSCAN_CORE_EXECUTORS_GPU_RESIDENT_GPU_RESIDENT_EXECUTOR_HPP

#include <cuda_runtime.h>

#include <fmt/format.h>

#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>

#include "gpu_resident_deck.hpp"
#include "holoscan/core/execution_context.hpp"
#include "holoscan/core/executor.hpp"
#include "holoscan/core/gpu_resident_operator.hpp"
#include "holoscan/utils/cuda/buffer.hpp"

namespace holoscan {

class GPUResidentExecutor : public Executor {
 public:
  GPUResidentExecutor() = delete;

  explicit GPUResidentExecutor(Fragment* fragment) : Executor(fragment) {
    gpu_resident_deck_ = std::make_shared<GPUResidentDeck>();
  }

  ~GPUResidentExecutor();

  void run(OperatorGraph& graph) override;

  std::future<void> run_async(OperatorGraph& graph) override;

  void context([[maybe_unused]] void* context) override {
    throw std::runtime_error("GPUResidentExecutor does not support context");
  }

  bool initialize_fragment() override;

  bool initialize_operator([[maybe_unused]] Operator* op) override;

  bool initialize_scheduler([[maybe_unused]] Scheduler* sch) override {
    throw std::runtime_error("GPUResidentExecutor does not support any scheduler");
  }

  bool initialize_network_context([[maybe_unused]] NetworkContext* network_context) override {
    throw std::runtime_error("GPUResidentExecutor does not support any network context");
  }

  bool initialize_fragment_services() override {
    throw std::runtime_error("GPUResidentExecutor does not support any fragment services");
  }

  void prepare_data_flow(std::shared_ptr<OperatorGraph> graph);

  void initialize_cuda();

  void* device_memory(std::shared_ptr<Operator> op, const std::string& port_name);

  virtual bool verify_graph_topology(
      std::shared_ptr<OperatorGraph> graph,
      std::vector<std::shared_ptr<Operator>>& topo_ordered_operators);

  void timeout_ms(unsigned long long timeout_ms);

  void tear_down();

  bool result_ready();

  void data_ready();

  bool is_launched();

  std::shared_ptr<ExecutionContext> execution_context() { return exec_context_; }

  std::shared_ptr<cudaStream_t> graph_capture_stream();

  std::shared_ptr<cudaStream_t> data_ready_handler_capture_stream();

  // Get the CUDA graph of the main workload. This function returns a clone of
  // the main workload graph because the original graph is owned and retained by
  // the executor. All the limitations of graph cloning apply here. Therefore, main workload
  // graphs containing memory allocation, memory free and conditional nodes are
  // not supported.
  // This is a utility helper function.
  cudaGraph_t workload_graph_clone() const;

  void* data_ready_device_address();

  void* result_ready_device_address();

  void* tear_down_device_address();

  void data_ready_handler(std::shared_ptr<Fragment> fragment);

  std::shared_ptr<Fragment> data_ready_handler_fragment();

 private:
  void allocate_io_device_buffer(std::shared_ptr<Operator> downstream_op,
                                 std::shared_ptr<Operator> upstream_op,
                                 const std::string& source_port, const std::string& target_port,
                                 size_t memory_block_size);
  void create_gpu_resident_cuda_graph();

  void create_cuda_graph_from_operators(
      std::vector<std::shared_ptr<Operator>>& topo_ordered_operators, cudaGraph_t& graph,
      cudaStream_t capture_stream);

  bool verify_distinct_operator_names();

  bool fragment_initialized_ = false;

  std::unordered_map<std::string, std::shared_ptr<holoscan::utils::cuda::DeviceBuffer>>
      io_device_buffers_;
  std::vector<std::shared_ptr<Operator>> topo_ordered_main_operators_;

  std::vector<std::shared_ptr<Operator>> topo_ordered_drh_operators_;

  std::shared_ptr<ExecutionContext> exec_context_;
  unsigned long long timeout_ms_ = 0;

  std::shared_ptr<cudaStream_t> graph_capture_stream_;
  std::shared_ptr<cudaStream_t> drh_capture_stream_;
  cudaGraph_t drh_graph_ = nullptr;
  cudaGraph_t workload_graph_ = nullptr;
  cudaGraph_t gpu_resident_graph_ =
      nullptr;

  std::shared_ptr<Fragment> data_ready_handler_fragment_;

  std::shared_ptr<GPUResidentDeck> gpu_resident_deck_;
};
}  // namespace holoscan

#endif// HOLOSCAN_CORE_EXECUTORS_GPU_RESIDENT_GPU_RESIDENT_EXECUTOR_HPP

© Copyright 2022-2025, NVIDIA. Last updated on Jan 8, 2026
content here