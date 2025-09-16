Program Listing for File data_processor.hpp
/*
* SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MODULES_HOLOINFER_SRC_PROCESS_DATA_PROCESSOR_HPP
#define MODULES_HOLOINFER_SRC_PROCESS_DATA_PROCESSOR_HPP
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_runtime_api.h>
#include <nvrtc.h>
#include <bits/stdc++.h>
#include <cstring>
#include <functional>
#include <iostream>
#include <map>
#include <memory>
#include <sstream>
#include <string>
#include <vector>
#include <holoinfer.hpp>
#include <holoinfer_constants.hpp>
#include <holoinfer_utils.hpp>
#include <holoscan/core/analytics/data_exporter.hpp>
#include <process/transforms/generate_boxes.hpp>
namespace holoscan {
namespace inference {
using processor_FP = std::function<InferStatus(
const std::vector<int>&, const void*, std::vector<int64_t>&, DataMap&,
const std::vector<std::string>& output_tensors, const std::vector<std::string>& custom_strings,
bool process_with_cuda, cudaStream_t cuda_stream)>;
using cuda_FP = std::function<InferStatus(const std::vector<std::string>&, const std::vector<int>&,
const void*, std::vector<int64_t>&, DataMap&,
const std::vector<std::string>& output_tensors,
bool process_with_cuda, cudaStream_t cuda_stream)>;
// Declaration of function callback for transforms that need configuration (via a yaml file).
// Transforms additionally support multiple inputs and outputs from the processing.
using transforms_FP =
std::function<InferStatus(const std::string&, const std::map<std::string, void*>&,
const std::map<std::string, std::vector<int>>&, DataMap&, DimType&)>;
class DataProcessor {
public:
DataProcessor() {}
~DataProcessor();
InferStatus initialize(const MultiMappings& process_operations, const Mappings& custom_kernels,
bool use_cuda_graphs, const std::string config_path);
InferStatus process_operation(const std::string& operation, const std::vector<int>& in_dims,
const void* in_data, std::vector<int64_t>& processed_dims,
DataMap& processed_data_map,
const std::vector<std::string>& output_tensors,
const std::vector<std::string>& custom_strings,
bool process_with_cuda, cudaStream_t cuda_stream);
InferStatus process_transform(const std::string& transform, const std::string& key,
const std::map<std::string, void*>& indata,
const std::map<std::string, std::vector<int>>& indim,
DataMap& processed_data, DimType& processed_dims);
InferStatus compute_max_per_channel_scaled(const std::vector<int>& in_dims, const void* in_data,
std::vector<int64_t>& out_dims, DataMap& out_data_map,
const std::vector<std::string>& output_tensors,
bool process_with_cuda, cudaStream_t cuda_stream);
InferStatus scale_intensity_cpu(const std::vector<int>& in_dims, const void* in_data,
std::vector<int64_t>& out_dims, DataMap& out_data_map,
const std::vector<std::string>& output_tensors);
InferStatus print_results(const std::vector<int>& in_dims, const void* in_data);
InferStatus print_results_int32(const std::vector<int>& in_dims, const void* in_data);
InferStatus print_custom_binary_classification(const std::vector<int>& in_dims,
const void* in_data,
const std::vector<std::string>& custom_strings);
InferStatus export_binary_classification_to_csv(const std::vector<int>& in_dims,
const void* in_data,
const std::vector<std::string>& custom_strings);
InferStatus launchCustomKernel(const std::vector<std::string>& ids,
const std::vector<int>& dimensions, const void* input,
std::vector<int64_t>& processed_dims, DataMap& processed_data_map,
const std::vector<std::string>& output_tensors,
bool process_with_cuda, cudaStream_t cuda_stream);
InferStatus prepareCustomKernel();
private:
inline static const std::map<std::string, holoinfer_data_processor> supported_compute_operations_{
{"max_per_channel_scaled", holoinfer_data_processor::h_CUDA_AND_HOST},
{"custom_cuda_kernel", holoinfer_data_processor::h_CUDA_AND_HOST},
{"scale_intensity_cpu", holoinfer_data_processor::h_HOST}};
inline static const std::map<std::string, holoinfer_data_processor> supported_transforms_{
{"generate_boxes", holoinfer_data_processor::h_HOST}};
// Map with operation name as key, with pointer to its object
std::map<std::string, std::unique_ptr<TransformBase>> transforms_;
inline static const std::map<std::string, holoinfer_data_processor> supported_print_operations_{
{"print", holoinfer_data_processor::h_HOST},
{"print_int32", holoinfer_data_processor::h_HOST},
{"print_custom_binary_classification", holoinfer_data_processor::h_HOST}};
inline static const std::map<std::string, holoinfer_data_processor> supported_export_operations_{
{"export_binary_classification_to_csv", holoinfer_data_processor::h_HOST}};
processor_FP max_per_channel_scaled_fp_ =
[this](auto& in_dims, const void* in_data, std::vector<int64_t>& out_dims, DataMap& out_data,
auto& output_tensors, auto& custom_strings, bool process_with_cuda,
cudaStream_t cuda_stream) {
return compute_max_per_channel_scaled(
in_dims, in_data, out_dims, out_data, output_tensors, process_with_cuda, cuda_stream);
};
processor_FP scale_intensity_cpu_fp_ = [this](auto& in_dims, const void* in_data,
std::vector<int64_t>& out_dims, DataMap& out_data,
auto& output_tensors, auto& custom_strings,
bool process_with_cuda, cudaStream_t cuda_stream) {
return scale_intensity_cpu(in_dims, in_data, out_dims, out_data, output_tensors);
};
processor_FP print_results_fp_ =
[this](auto& in_dims, const void* in_data, std::vector<int64_t>& out_dims, DataMap& out_data,
auto& output_tensors, auto& custom_strings, bool process_with_cuda,
cudaStream_t cuda_stream) { return print_results(in_dims, in_data); };
processor_FP print_custom_binary_classification_fp_ =
[this](auto& in_dims, const void* in_data, std::vector<int64_t>& out_dims, DataMap& out_data,
auto& output_tensors, auto& custom_strings, bool process_with_cuda,
cudaStream_t cuda_stream) {
return print_custom_binary_classification(in_dims, in_data, custom_strings);
};
processor_FP export_binary_classification_to_csv_fp_ =
[this](auto& in_dims, const void* in_data, std::vector<int64_t>& out_dims, DataMap& out_data,
auto& output_tensors, auto& custom_strings, bool process_with_cuda,
cudaStream_t cuda_stream) {
return export_binary_classification_to_csv(in_dims, in_data, custom_strings);
};
processor_FP print_results_i32_fp_ =
[this](auto& in_dims, const void* in_data, std::vector<int64_t>& out_dims, DataMap& out_data,
auto& output_tensors, auto& custom_strings, bool process_with_cuda,
cudaStream_t cuda_stream) { return print_results_int32(in_dims, in_data); };
cuda_FP custom_cuda_kernel_fp_ = [this](const std::vector<std::string>& ids, auto& in_dims,
const void* in_data, std::vector<int64_t>& out_dims,
DataMap& out_data, auto& output_tensors,
bool process_with_cuda, cudaStream_t cuda_stream) {
return launchCustomKernel(
ids, in_dims, in_data, out_dims, out_data, output_tensors, process_with_cuda, cuda_stream);
};
const std::map<std::string, processor_FP> oper_to_fp_{
{"max_per_channel_scaled", max_per_channel_scaled_fp_},
{"scale_intensity_cpu", scale_intensity_cpu_fp_},
{"print", print_results_fp_},
{"print_int32", print_results_i32_fp_},
{"print_custom_binary_classification", print_custom_binary_classification_fp_},
{"export_binary_classification_to_csv", export_binary_classification_to_csv_fp_}};
const std::map<std::string, cuda_FP> cuda_to_fp_{{"custom_cuda_kernel", custom_cuda_kernel_fp_}};
transforms_FP generate_boxes_fp_ = [this](const std::string& key,
const std::map<std::string, void*>& indata,
const std::map<std::string, std::vector<int>>& indim,
DataMap& processed_data, DimType& processed_dims) {
return transforms_.at(key)->execute(indata, indim, processed_data, processed_dims);
};
const std::map<std::string, transforms_FP> transform_to_fp_{
{"generate_boxes", generate_boxes_fp_}};
void max_per_channel_scaled_cuda(size_t rows, size_t cols, size_t channels, const float* indata,
float* outdata, cudaStream_t cuda_stream);
std::string config_path_ = {};
std::unique_ptr<DataExporter> data_exporter_ = nullptr;
// Custom CUDA kernel feature related parameters
CUdevice device_ = 0;
CUmodule module_ = nullptr;
std::string custom_cuda_src_;
std::map<std::string, CUfunction> kernel_;
std::map<std::string, holoinfer_datatype> output_dtype_;
std::map<std::string, std::string> custom_kernel_thread_per_block_;
std::map<std::string, std::vector<int>> custom_kernel_output_dimensions_;
std::map<std::string, bool> custom_cuda_kernel_processed_;
bool dynamic_output_dim_ = true;
std::map<std::string, bool> first_time_kernel_launch_map_;
std::map<std::string, std::vector<std::shared_ptr<DataBuffer>>> intermediate_buffers_;
std::map<std::string, std::vector<void*>> intermediate_inputs_;
bool use_cuda_graph_ = false;
std::map<std::string, bool> cuda_graph_instantiated_;
std::map<std::string, bool> cuda_graph_created_;
std::map<std::string, CUgraph> graph_;
std::map<std::string, CUgraphExec> cuda_graph_instance_;
std::map<std::string, std::vector<CUgraphNode>> kernel_nodes_;
std::map<std::string, std::vector<CUDA_KERNEL_NODE_PARAMS>> kernel_node_params_;
};
} // namespace inference
} // namespace holoscan
#endif/* MODULES_HOLOINFER_SRC_PROCESS_DATA_PROCESSOR_HPP */