pva_apl.h#

Fully qualified name: src/device/fixed_function_library/include/pva_apl.h

File members: src/device/fixed_function_library/include/pva_apl.h

/*
 * SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
 *
 * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
 * property and proprietary rights in and to this material, related
 * documentation and any modifications thereto. Any use, reproduction,
 * disclosure or distribution of this material and related documentation
 * without an express license agreement from NVIDIA CORPORATION or
 * its affiliates is strictly prohibited.
 */

#ifndef PVA_APL_H
#define PVA_APL_H

#include "pva_apl/pva_apl_impl.h"
#include "pva_apl/pva_apl_types.h"

#include <cupva_device.h>
#include <stdint.h>

void pvaAplInitSepConv5x5S16Vpu(PvaAplSepConv5x5S16 *handle, int16_t *input, int16_t *coeff_h, int16_t *coeff_v,
                                int32_t width, int32_t height, int32_t inputLinePitch, int32_t outputLinePitch,
                                int32_t roundNBits, int16_t *output, int16_t *cbStart, int32_t cbSize,
                                int16_t *scratch);

inline void pvaAplInitSepConv5x5S16(PvaAplSepConv5x5S16 *handle, int16_t *input, int16_t *coeff_h, int16_t *coeff_v,
                                    int32_t width, int32_t height, int32_t inputLinePitch, int32_t outputLinePitch,
                                    int32_t roundNBits, int16_t *output, int16_t *cbStart, int32_t cbSize,
                                    int16_t *scratch)
{
    PVAAPL_IMPL_FUNC(SepConv5x5S16, Init, handle, input, coeff_h, coeff_v, width, height, inputLinePitch,
                     outputLinePitch, roundNBits, output, cbStart, cbSize, scratch);
}

void pvaAplUpdateSepConv5x5S16Vpu(PvaAplSepConv5x5S16 *handle, int16_t *input, int16_t *output);

inline void pvaAplUpdateSepConv5x5S16(PvaAplSepConv5x5S16 *handle, int16_t *input, int16_t *output)
{
    PVAAPL_IMPL_FUNC(SepConv5x5S16, Update, handle, input, output);
}

void pvaAplExecSepConv5x5S16Vpu(PvaAplSepConv5x5S16 *handle);

inline void pvaAplExecSepConv5x5S16(PvaAplSepConv5x5S16 *handle)
{
    PVAAPL_IMPL_FUNC(SepConv5x5S16, Exec, handle);
}

#define _PVAAPLROUNDUP(_x_, _y_) (((_x_) + ((_y_) - 1)) / (_y_) * (_y_))
#define PVAAPL_HARRIS_SCRATCH_SIZE(_width_, _height_) \
    ((_PVAAPLROUNDUP((_width_ - 2), 32) + 1) * (_PVAAPLROUNDUP(_height_, 32)) * sizeof(int16_t) * 2)

void pvaAplInitHarrisCornerS16Vpu(PvaAplHarrisCornerS16 *handle, int16_t *input, int16_t *coeff, int32_t roundNBits,
                                  int32_t lambda, int32_t width, int32_t height, int32_t inputLinePitch,
                                  int32_t outputLinePitch, int32_t *output, int16_t *cbStart, int32_t cbSize,
                                  void *scratch0, void *scratch1, void *scratch2);

inline void pvaAplInitHarrisCornerS16(PvaAplHarrisCornerS16 *handle, int16_t *input, int16_t *coeff, int32_t roundNBits,
                                      int32_t lambda, int32_t width, int32_t height, int32_t inputLinePitch,
                                      int32_t outputLinePitch, int32_t *output, int16_t *cbStart, int32_t cbSize,
                                      void *scratch0, void *scratch1, void *scratch2)
{
    PVAAPL_IMPL_FUNC(HarrisCornerS16, Init, handle, input, coeff, roundNBits, lambda, width, height, inputLinePitch,
                     outputLinePitch, output, cbStart, cbSize, scratch0, scratch1, scratch2);
}

void pvaAplUpdateHarrisCornerS16Vpu(PvaAplHarrisCornerS16 *handle, int16_t *input, int32_t *output);

inline void pvaAplUpdateHarrisCornerS16(PvaAplHarrisCornerS16 *handle, int16_t *input, int32_t *output)
{
    PVAAPL_IMPL_FUNC(HarrisCornerS16, Update, handle, input, output);
}

void pvaAplExecHarrisCornerS16Vpu(PvaAplHarrisCornerS16 *handle);

inline void pvaAplExecHarrisCornerS16(PvaAplHarrisCornerS16 *handle)
{
    PVAAPL_IMPL_FUNC(HarrisCornerS16, Exec, handle);
}

#define PVAAPL_NMS_SCRATCH_SIZE(_width_, _height_) (_PVAAPLROUNDUP((_width_) * (_height_), 16) * sizeof(int32_t))

void pvaAplInitNms3x3S32Vpu(PvaAplNms3x3S32 *handle, int32_t *input, int32_t width, int32_t height,
                            int32_t inputLinePitch, int32_t outputLinePitch, int32_t *output, int32_t *cbStart,
                            int32_t cbSize, void *scratch);

inline void pvaAplInitNms3x3S32(PvaAplNms3x3S32 *handle, int32_t *input, int32_t width, int32_t height,
                                int32_t inputLinePitch, int32_t outputLinePitch, int32_t *output, int32_t *cbStart,
                                int32_t cbSize, void *scratch)
{
    PVAAPL_IMPL_FUNC(Nms3x3S32, Init, handle, input, width, height, inputLinePitch, outputLinePitch, output, cbStart,
                     cbSize, scratch);
}

void pvaAplUpdateNms3x3S32Vpu(PvaAplNms3x3S32 *handle, int32_t *input, int32_t *output);

inline void pvaAplUpdateNms3x3S32(PvaAplNms3x3S32 *handle, int32_t *input, int32_t *output)
{
    PVAAPL_IMPL_FUNC(Nms3x3S32, Update, handle, input, output);
}

void pvaAplExecNms3x3S32Vpu(PvaAplNms3x3S32 *handle);

inline void pvaAplExecNms3x3S32(PvaAplNms3x3S32 *handle)
{
    PVAAPL_IMPL_FUNC(Nms3x3S32, Exec, handle);
}

void pvaAplInitNms5x5S32Vpu(PvaAplNms5x5S32 *handle, int32_t *input, int32_t width, int32_t height,
                            int32_t inputLinePitch, int32_t outputLinePitch, int32_t *output, int32_t *cbStart,
                            int32_t cbSize, void *scratch);

inline void pvaAplInitNms5x5S32(PvaAplNms5x5S32 *handle, int32_t *input, int32_t width, int32_t height,
                                int32_t inputLinePitch, int32_t outputLinePitch, int32_t *output, int32_t *cbStart,
                                int32_t cbSize, void *scratch)
{
    PVAAPL_IMPL_FUNC(Nms5x5S32, Init, handle, input, width, height, inputLinePitch, outputLinePitch, output, cbStart,
                     cbSize, scratch);
}

void pvaAplUpdateNms5x5S32Vpu(PvaAplNms5x5S32 *handle, int32_t *input, int32_t *output);

inline void pvaAplUpdateNms5x5S32(PvaAplNms5x5S32 *handle, int32_t *input, int32_t *output)
{
    PVAAPL_IMPL_FUNC(Nms5x5S32, Update, handle, input, output);
}

void pvaAplExecNms5x5S32Vpu(PvaAplNms5x5S32 *handle);

inline void pvaAplExecNms5x5S32(PvaAplNms5x5S32 *handle)
{
    PVAAPL_IMPL_FUNC(Nms5x5S32, Exec, handle);
}

inline void pvaAplWait()
{
/* AlgoWait is noop on Orin */
#if CUPVA_PVA_GEN_NUMBER > 2
    cupvaPPEWait();
#endif
}

#endif //PVA_APL_H