NVIDIA Docs Hub NVIDIA Holoscan Clara Holoscan v0.3.0 Program Listing for File tensor_rt_inference.hpp

Program Listing for File tensor_rt_inference.hpp

↰ Return to documentation for file (gxf_extensions/custom_lstm_inference/tensor_rt_inference.hpp)

Copy
Copied!

            
            /*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef NVIDIA_CLARA_HOLOSCAN_GXF_EXTENSIONS_CUSTOM_LSTM_INFERENCE_TENSOR_RT_INFERENCE_HPP_
#define NVIDIA_CLARA_HOLOSCAN_GXF_EXTENSIONS_CUSTOM_LSTM_INFERENCE_TENSOR_RT_INFERENCE_HPP_

#include <NvInfer.h>
#include <cuda_runtime.h>

#include <memory>
#include <mutex>
#include <string>
#include <unordered_map>
#include <vector>

#include "gxf/core/entity.hpp"
#include "gxf/core/gxf.h"
#include "gxf/core/parameter.hpp"
#include "gxf/cuda/cuda_stream.hpp"
#include "gxf/cuda/cuda_stream_pool.hpp"
#include "gxf/std/allocator.hpp"
#include "gxf/std/clock.hpp"
#include "gxf/std/codelet.hpp"
#include "gxf/std/receiver.hpp"
#include "gxf/std/tensor.hpp"
#include "gxf/std/transmitter.hpp"

namespace nvidia::holoscan::custom_lstm_inference {

// Logger for TensorRT to redirect logging into gxf console spew.
class TensorRTInferenceLogger : public nvinfer1::ILogger {
 public:
  void log(ILogger::Severity severity, const char* msg) throw() override;
  // Sets verbose flag for logging
  void setVerbose(bool verbose);

 private:
  bool verbose_;
};

class TensorRtInference : public gxf::Codelet {
 public:
  gxf_result_t start() override;
  gxf_result_t tick() override;
  gxf_result_t stop() override;
  gxf_result_t registerInterface(gxf::Registrar* registrar) override;

 private:
  // Helper to return a string for the TRT engine capability.
  gxf::Expected<std::string> queryHostEngineCapability() const;
  // Helper to search for the engine file path.
  gxf::Expected<std::string> findEngineFilePath(const std::string& host_engine_capability) const;

  // Helper deleter to call destroy while destroying the cuda objects
  template <typename T>
  struct DeleteFunctor {
    inline void operator()(void* ptr) { reinterpret_cast<T*>(ptr)->destroy(); }
  };
  // unique_ptr using custom Delete Functor above
  template <typename T>
  using NvInferHandle = std::unique_ptr<T, DeleteFunctor<T>>;

  // To cache binding info for tensors
  typedef struct {
    int32_t index;
    uint32_t rank;
    std::string binding_name;
    gxf::PrimitiveType element_type;
    std::array<int32_t, gxf::Shape::kMaxRank> dimensions;
  } BindingInfo;
  std::unordered_map<std::string, BindingInfo> binding_infos_;

  // Converts loaded model to engine plan
  gxf::Expected<std::vector<char>> convertModelToEngine();

  gxf::Parameter<std::string> model_file_path_;
  gxf::Parameter<std::string> engine_cache_dir_;
  gxf::Parameter<std::string> plugins_lib_namespace_;
  gxf::Parameter<bool> force_engine_update_;
  gxf::Parameter<std::vector<std::string>> input_tensor_names_;
  gxf::Parameter<std::vector<std::string>> input_state_tensor_names_;
  gxf::Parameter<std::vector<std::string>> input_binding_names_;
  gxf::Parameter<std::vector<std::string>> output_tensor_names_;
  gxf::Parameter<std::vector<std::string>> output_state_tensor_names_;
  gxf::Parameter<std::vector<std::string>> output_binding_names_;
  gxf::Parameter<gxf::Handle<gxf::Allocator>> pool_;
  gxf::Parameter<gxf::Handle<gxf::CudaStreamPool>> cuda_stream_pool_;
  gxf::Parameter<int64_t> max_workspace_size_;
  gxf::Parameter<int64_t> dla_core_;
  gxf::Parameter<int32_t> max_batch_size_;
  gxf::Parameter<bool> enable_fp16_;
  gxf::Parameter<bool> relaxed_dimension_check_;
  gxf::Parameter<bool> verbose_;
  gxf::Parameter<gxf::Handle<gxf::Clock>> clock_;

  gxf::Parameter<std::vector<gxf::Handle<gxf::Receiver>>> rx_;
  gxf::Parameter<gxf::Handle<gxf::Transmitter>> tx_;

  // Logger instance for TensorRT
  TensorRTInferenceLogger cuda_logger_;

  NvInferHandle<nvinfer1::IExecutionContext> cuda_execution_ctx_;
  NvInferHandle<nvinfer1::ICudaEngine> cuda_engine_;

  gxf::Handle<gxf::CudaStream> cuda_stream_;
  std::vector<void*> cuda_buffers_;
  cudaStream_t cached_cuda_stream_;
  cudaEvent_t cuda_event_consumed_;
  cudaEvent_t cuda_event_done_;

  uint32_t state_tensor_count_ = 0;
  gxf::Expected<gxf::Entity> internal_states_ = gxf::Unexpected{GXF_UNINITIALIZED_VALUE};
  std::string engine_file_path_;
};

}  // namespace nvidia::holoscan::custom_lstm_inference

#endif// NVIDIA_CLARA_HOLOSCAN_GXF_EXTENSIONS_CUSTOM_LSTM_INFERENCE_TENSOR_RT_INFERENCE_HPP_