NVIDIA Docs Hub NVIDIA Holoscan NVIDIA Holoscan SDK v0.4.0 Program Listing for File holoinfer_buffer.hpp

Program Listing for File holoinfer_buffer.hpp

↰ Return to documentation for file (modules/holoinfer/src/include/holoinfer_buffer.hpp)

Copy
Copied!

            
            /*
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _HOLOSCAN_INFER_BUFFER_H
#define _HOLOSCAN_INFER_BUFFER_H

#include <cuda_runtime_api.h>
#include <sys/stat.h>

#include <algorithm>
#include <fstream>
#include <iostream>
#include <iterator>
#include <map>
#include <memory>
#include <mutex>
#include <numeric>
#include <string>
#include <utility>
#include <vector>

#include "holoinfer_constants.hpp"

#define _HOLOSCAN_EXTERNAL_API_ __attribute__((visibility("default")))

namespace holoscan {
namespace inference {

uint32_t get_element_size(holoinfer_datatype t) noexcept;

class DeviceAllocator {
 public:
  bool operator()(void** ptr, size_t size) const;
};

class DeviceFree {
 public:
  void operator()(void* ptr) const;
};

class DeviceBuffer {
 public:
  explicit DeviceBuffer(holoinfer_datatype type = holoinfer_datatype::hFloat);

  DeviceBuffer(size_t size, holoinfer_datatype type);

  void* data();

  size_t size() const;

  size_t get_bytes() const;

  void resize(size_t element_size);

  ~DeviceBuffer();

 private:
  size_t size_{0}, capacity_{0};
  holoinfer_datatype type_ = holoinfer_datatype::hFloat;
  void* buffer_ = nullptr;
  DeviceAllocator allocator_;
  DeviceFree free_;
};

class DataBuffer {
 public:
  DataBuffer();
  std::shared_ptr<DeviceBuffer> device_buffer;
  std::vector<float> host_buffer;
};

using DataMap = std::map<std::string, std::shared_ptr<DataBuffer>>;
using Mappings = std::map<std::string, std::string>;
using DimType = std::map<std::string, std::vector<int64_t>>;
using MultiMappings = std::map<std::string, std::vector<std::string>>;

struct MultiAISpecs {
  MultiAISpecs() = default;
  MultiAISpecs(const std::string& backend, const Mappings& model_path_map,
               const Mappings& inference_map, bool is_engine_path, bool oncpu, bool parallel_proc,
               bool use_fp16, bool cuda_buffer_in, bool cuda_buffer_out)
      : backend_type_(backend),
        model_path_map_(model_path_map),
        inference_map_(inference_map),
        is_engine_path_(is_engine_path),
        oncuda_(!oncpu),
        parallel_processing_(parallel_proc),
        use_fp16_(use_fp16),
        cuda_buffer_in_(cuda_buffer_in),
        cuda_buffer_out_(cuda_buffer_out) {}

  Mappings get_path_map() const { return model_path_map_; }

  bool oncuda_ = true;

  bool is_engine_path_ = false;

  bool parallel_processing_ = false;

  bool use_fp16_ = false;

  bool cuda_buffer_in_ = true;

  bool cuda_buffer_out_ = true;

  Mappings model_path_map_;

  Mappings inference_map_;

  DataMap data_per_model_;

  DataMap data_per_tensor_;

  DataMap output_per_model_;

  std::string backend_type_{"trt"};
};

InferStatus allocate_host_device_buffers(DataMap& buffers, std::vector<int64_t>& dims_map,
                                         const std::string& mappings);

InferStatus allocate_host_buffers(DataMap& buffers, std::vector<int64_t>& dims,
                                  const std::string& keyname);
}  // namespace inference
}  // namespace holoscan

#endif