Clara Holoscan v0.2.0
1.0

Program Listing for File segmentation_postprocessor.cu.cpp

Return to documentation for file (gxf_extensions/segmentation_postprocessor/segmentation_postprocessor.cu.cpp)

Copy
Copied!
            

/* * Copyright (c) 2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "segmentation_postprocessor.cu.hpp" namespace nvidia { namespace holoscan { namespace segmentation_postprocessor { __forceinline__ __device__ uint32_t hwc_to_index(Shape shape, uint32_t y, uint32_t x, uint32_t c) { return (y * shape.width + x) * shape.channels + c; } __forceinline__ __device__ uint32_t nchw_to_index(Shape shape, uint32_t y, uint32_t x, uint32_t c) { return (c * shape.height + y) * shape.width + x; } template <enum DataFormat> __forceinline__ __device__ uint32_t data_format_to_index(Shape shape, uint32_t y, uint32_t x, uint32_t c) {} template <> __forceinline__ __device__ uint32_t data_format_to_index<DataFormat::kHWC>(Shape shape, uint32_t y, uint32_t x, uint32_t c) { return hwc_to_index(shape, y, x, c); } template<> __forceinline__ __device__ uint32_t data_format_to_index<DataFormat::kNHWC>(Shape shape, uint32_t y, uint32_t x, uint32_t c) { return hwc_to_index(shape, y, x, c); } template <> __forceinline__ __device__ uint32_t data_format_to_index<DataFormat::kNCHW>(Shape shape, uint32_t y, uint32_t x, uint32_t c) { return nchw_to_index(shape, y, x, c); } __forceinline__ __device__ uint32_t hw1_to_index(Shape shape, uint32_t y, uint32_t x) { return y * shape.width + x; } template <enum NetworkOutputType network_output_type, enum DataFormat data_format> __global__ void postprocessing_kernel(Shape shape, const float* input, output_type_t* output) { const uint32_t x = blockIdx.x * blockDim.x + threadIdx.x; const uint32_t y = blockIdx.y * blockDim.y + threadIdx.y; if ((x >= shape.width) || (y >= shape.height)) { return; } float max_value = 0.0f; uint8_t max_index = 0; switch (network_output_type) { case NetworkOutputType::kSigmoid: { const float value = input[data_format_to_index<data_format>(shape, y, x, 0)]; max_index = value >= 0.5f ? 1 : 0; } break; case NetworkOutputType::kSoftmax: { for (uint32_t c = 0; c < shape.channels; c++) { const float value = input[data_format_to_index<data_format>(shape, y, x, c)]; if (value > max_value) { max_value = value; max_index = c; } } } break; } output[hw1_to_index(shape, y, x)] = max_index; } uint16_t ceil_div(uint16_t numerator, uint16_t denominator) { uint32_t accumulator = numerator + denominator - 1; return accumulator / denominator; } void cuda_postprocess(enum NetworkOutputType network_output_type, enum DataFormat data_format, Shape shape, const float* input, output_type_t* output) { dim3 block(32, 32, 1); dim3 grid(ceil_div(shape.width, block.x), ceil_div(shape.height, block.y), 1); switch (network_output_type) { case NetworkOutputType::kSigmoid: switch (data_format) { case DataFormat::kNCHW: postprocessing_kernel<NetworkOutputType::kSigmoid, DataFormat::kNCHW> <<<grid, block>>>(shape, input, output); break; case DataFormat::kHWC: postprocessing_kernel<NetworkOutputType::kSigmoid, DataFormat::kHWC> <<<grid, block>>>(shape, input, output); break; case DataFormat::kNHWC: postprocessing_kernel<NetworkOutputType::kSigmoid, DataFormat::kNHWC> <<<grid, block>>>(shape, input, output); break; } break; case NetworkOutputType::kSoftmax: switch (data_format) { case DataFormat::kNCHW: postprocessing_kernel<NetworkOutputType::kSoftmax, DataFormat::kNCHW> <<<grid, block>>>(shape, input, output); break; case DataFormat::kHWC: postprocessing_kernel<NetworkOutputType::kSoftmax, DataFormat::kHWC> <<<grid, block>>>(shape, input, output); break; case DataFormat::kNHWC: postprocessing_kernel<NetworkOutputType::kSoftmax, DataFormat::kNHWC> <<<grid, block>>>(shape, input, output); break; } break; } } } // namespace segmentation_postprocessor } // namespace holoscan } // namespace nvidia

© Copyright 2022, NVIDIA. Last updated on Jun 28, 2023.