NVIDIA Docs Hub NVIDIA Holoscan NVIDIA Holoscan SDK v3.4.0 Program Listing for File holoinfer_buffer.hpp

Program Listing for File holoinfer_buffer.hpp

↰ Return to documentation for file (modules/holoinfer/src/include/holoinfer_buffer.hpp)

Copy
Copied!

            
            /*
* SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MODULES_HOLOINFER_SRC_INCLUDE_HOLOINFER_BUFFER_HPP
#define MODULES_HOLOINFER_SRC_INCLUDE_HOLOINFER_BUFFER_HPP

#include <cuda_runtime_api.h>
#include <sys/stat.h>

#include <algorithm>
#include <fstream>
#include <iostream>
#include <iterator>
#include <map>
#include <memory>
#include <mutex>
#include <numeric>
#include <string>
#include <utility>
#include <vector>

#include "holoinfer_constants.hpp"

#define _HOLOSCAN_EXTERNAL_API_ __attribute__((visibility("default")))

namespace holoscan {
namespace inference {

uint32_t get_element_size(holoinfer_datatype t) noexcept;

class DeviceAllocator {
 public:
  bool operator()(void** ptr, size_t size) const;
};

class DeviceFree {
 public:
  void operator()(void* ptr) const;
};

class Buffer {
 public:
  explicit Buffer(holoinfer_datatype type = holoinfer_datatype::h_Float32, int device_id = 0)
      : type_(type), device_id_(device_id) {}

  virtual ~Buffer() = default;

  virtual void* data() = 0;

  virtual size_t size() const = 0;

  virtual size_t get_bytes() const = 0;

  virtual void resize(size_t number_of_elements) = 0;

  holoinfer_datatype get_datatype() const { return type_; }

  int get_device() const { return device_id_; }

 protected:
  holoinfer_datatype type_;
  int device_id_;
};

class DeviceBuffer : public Buffer {
 public:
  explicit DeviceBuffer(holoinfer_datatype type = holoinfer_datatype::h_Float32, int device_id = 0);

  DeviceBuffer(size_t size, holoinfer_datatype type);

  ~DeviceBuffer();

  void* data() override;
  size_t size() const override;
  size_t get_bytes() const override;
  void resize(size_t number_of_elements) override;

 private:
  size_t size_{0}, capacity_{0};
  void* buffer_ = nullptr;
  DeviceAllocator allocator_;
  DeviceFree free_;
};

class HostBuffer : public Buffer {
 public:
  explicit HostBuffer(holoinfer_datatype data_type = holoinfer_datatype::h_Float32)
      : Buffer(data_type, -1) {}

  void* data() override;
  size_t size() const override;
  size_t get_bytes() const override;
  void resize(size_t number_of_elements) override;

  void set_type(holoinfer_datatype in_type);

 private:
  std::vector<byte> buffer_;
  size_t number_of_elements_{0};
};

class DataBuffer {
 public:
  explicit DataBuffer(holoinfer_datatype data_type = holoinfer_datatype::h_Float32,
                      int device_id = 0);

  std::shared_ptr<Buffer> device_buffer_;
  std::shared_ptr<Buffer> host_buffer_;

  holoinfer_datatype get_datatype() const { return host_buffer_->get_datatype(); }

 private:
  class BufferForward : public Buffer {
   public:
    explicit BufferForward(std::shared_ptr<Buffer>& buffer) : buffer_(buffer) {}
    BufferForward() = delete;

    void* data() override { return buffer_->data(); }
    size_t size() const override { return buffer_->size(); }
    size_t get_bytes() const override { return buffer_->get_bytes(); }
    void resize(size_t number_of_elements) override { buffer_->resize(number_of_elements); }

   private:
    const std::shared_ptr<Buffer>& buffer_;
  };

 public:
  const std::shared_ptr<BufferForward> device_buffer{
      std::make_shared<BufferForward>(device_buffer_)};
  BufferForward host_buffer{host_buffer_};

 private:
  holoinfer_datatype data_type_;
};

using DataMap = std::map<std::string, std::shared_ptr<DataBuffer>>;
using Mappings = std::map<std::string, std::string>;
using DimType = std::map<std::string, std::vector<std::vector<int64_t>>>;
using MultiMappings = std::map<std::string, std::vector<std::string>>;

struct ActivationSpec {
  ActivationSpec() = default;

  explicit ActivationSpec(const std::string& model_name, bool active = true)
      : model_name_(model_name), active_(active) {}

  bool is_active() const { return active_; }
  std::string model() const { return model_name_; }
  void set_active(bool value = true) { active_ = value; }
  std::string model_name_;
  bool active_;
};

struct InferenceSpecs {
  InferenceSpecs() = default;
  InferenceSpecs(const std::string& backend, const Mappings& backend_map,
                 const Mappings& model_path_map, const MultiMappings& pre_processor_map,
                 const MultiMappings& inference_map, const Mappings& device_map,
                 const Mappings& dla_core_map,
                 const Mappings& temporal_map, const Mappings& activation_map,
                 const std::vector<int32_t>& trt_opt_profile, bool is_engine_path, bool oncpu,
                 bool parallel_proc, bool use_fp16, bool cuda_buffer_in, bool cuda_buffer_out,
                 bool use_cuda_graphs, int32_t dla_core, bool dla_gpu_fallback)
      : backend_type_(backend),
        backend_map_(backend_map),
        model_path_map_(model_path_map),
        pre_processor_map_(pre_processor_map),
        inference_map_(inference_map),
        device_map_(device_map),
        dla_core_map_(dla_core_map),
        temporal_map_(temporal_map),
        activation_map_(activation_map),
        trt_opt_profile_(trt_opt_profile),
        is_engine_path_(is_engine_path),
        oncuda_(!oncpu),
        parallel_processing_(parallel_proc),
        use_fp16_(use_fp16),
        cuda_buffer_in_(cuda_buffer_in),
        cuda_buffer_out_(cuda_buffer_out),
        use_cuda_graphs_(use_cuda_graphs),
        dla_core_(dla_core),
        dla_gpu_fallback_(dla_gpu_fallback) {}

  Mappings get_path_map() const { return model_path_map_; }

  Mappings get_backend_map() const { return backend_map_; }

  Mappings get_device_map() const { return device_map_; }

  Mappings get_dla_core_map() const { return dla_core_map_; }

  Mappings get_temporal_map() const { return temporal_map_; }

  Mappings get_activation_map() const { return activation_map_; }

  void set_activation_map(const Mappings& activation_map) {
    activation_map_.clear();
    for (const auto& [key, value] : activation_map) { activation_map_[key] = value; }
  }

  std::string backend_type_{""};

  Mappings backend_map_;

  Mappings model_path_map_;

  MultiMappings pre_processor_map_;

  MultiMappings inference_map_;

  Mappings device_map_;

  Mappings dla_core_map_;

  Mappings temporal_map_;

  Mappings activation_map_;

  std::vector<int32_t> trt_opt_profile_;

  bool is_engine_path_ = false;

  bool oncuda_ = true;

  bool parallel_processing_ = false;

  bool use_fp16_ = false;

  bool cuda_buffer_in_ = true;

  bool cuda_buffer_out_ = true;

  bool use_cuda_graphs_ = true;

  int32_t dla_core_ = -1;

  bool dla_gpu_fallback_ = true;

  DataMap data_per_tensor_;

  DataMap output_per_model_;
};

InferStatus allocate_buffers(DataMap& buffers, std::vector<int64_t>& dims,
                             holoinfer_datatype datatype, const std::string& keyname,
                             bool allocate_cuda, int device_id);
}  // namespace inference
}  // namespace holoscan

#endif/* MODULES_HOLOINFER_SRC_INCLUDE_HOLOINFER_BUFFER_HPP */