sampler.h#
Fully qualified name: src/device/vpu_runtime/include/cupva_device/sampler.h
File members: src/device/vpu_runtime/include/cupva_device/sampler.h
/*
* Copyright (c) 2023 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto. Any use, reproduction, disclosure or
* distribution of this software and related documentation without an express
* license agreement from NVIDIA Corporation is strictly prohibited.
*/
#ifndef CUPVA_SAMPLER_H
#define CUPVA_SAMPLER_H
#include "device_core.h"
#include <cupva_types.h>
#include <stddef.h>
#include <stdint.h>
#if CUPVA_BUILD_MODE == CUPVA_NATIVE
# include <dlut_defs.h>
#endif
#define CUPVA_SAMPLER_GPIO 1024
#define CUPVA_SAMPLER_CFG_ADDR 0x800
#define CUPVA_SAMPLER_AMOD1 VMEM_SUPERBANK_WIDTH_IN_BYTES
#define CUPVA_SAMPLER_DUPLICATE_HANDLING_EN 1U
#define CUPVA_SAMPLER_TASKINFO_IDX_RND_MODE_RANGE 1U : 1U
#define CUPVA_SAMPLER_TASKINFO_OUT_OF_RANGE_HANDLING_RANGE 3U : 2U
#define CUPVA_SAMPLER_TASKINFO_ENTRY_TYPE_RANGE 6U : 4U
#define CUPVA_SAMPLER_TASKINFO_INDEX_TYPE_RANGE 9U : 8U
#define CUPVA_SAMPLER_TASKINFO_TASK_MODE_RANGE 14U : 12U
#define CUPVA_SAMPLER_AUTOIDX_MODE_RANGER 4U : 4U
#define CUPVA_SAMPLER_AUTOIDX_TRAVERSAL_DIR_RANGE 0U : 0U
#define CUPVA_SAMPLER_OUTPUT_TRANS_MODE_RANGE 6U : 4U
#define CUPVA_SAMPLER_TASKINFO_IDX_RND_MODE_SHIFT CUPVA_FIELD_SHIFT(CUPVA_SAMPLER_TASKINFO_IDX_RND_MODE_RANGE)
#define CUPVA_SAMPLER_TASKINFO_OUT_OF_RANGE_HANDLING_SHIFT \
CUPVA_FIELD_SHIFT(CUPVA_SAMPLER_TASKINFO_OUT_OF_RANGE_HANDLING_RANGE)
#define CUPVA_SAMPLER_TASKINFO_TASK_MODE_SHIFT CUPVA_FIELD_SHIFT(CUPVA_SAMPLER_TASKINFO_TASK_MODE_RANGE)
#define CUPVA_SAMPLER_TASKINFO_ENTRY_TYPE_SHIFT CUPVA_FIELD_SHIFT(CUPVA_SAMPLER_TASKINFO_ENTRY_TYPE_RANGE)
#define CUPVA_SAMPLER_TASKINFO_INDEX_TYPE_SHIFT CUPVA_FIELD_SHIFT(CUPVA_SAMPLER_TASKINFO_INDEX_TYPE_RANGE)
#define CUPVA_SAMPLER_AUTOIDX_MODE_SHIFT CUPVA_FIELD_SHIFT(CUPVA_SAMPLER_AUTOIDX_MODE_RANGER)
#define CUPVA_SAMPLER_OUTPUT_TRANS_MODE_SHIFT CUPVA_FIELD_SHIFT(CUPVA_SAMPLER_OUTPUT_TRANS_MODE_RANGE)
#define CUPVA_SAMPLER_ALIGNED(_a, _b, _c) (((_a) + ((_b) >> (_c)) - 1U) / ((_b) >> (_c)))
struct CupvaSampler
{
uint8_t fraction_bits; // 0 ~ 16.
uint8_t reserved_0;
// containing
// bits 14:12 dlut_task_mode
// bits 9: 8 dlut_index_type
// bits 6: 4 dlut_entry_type
// bits 3: 2 dlut_out_of_range_hndl
// bit 1 dlut_idx_round_mode
// bit 0 dlut_duplicate_hndl
uint16_t task_info;
uint32_t x_int_limit; // U18, closed interval [0, x_int_limit << fraction_bits]
uint32_t y_int_limit; // U18, closed interval [0, y_int_limit << fraction_bits]
int32_t x_offset;
int32_t y_offset;
uint16_t task_len_N1; // number of pixels per round
uint16_t task_len_N2; // number of rounds
uint32_t sentinel_value;
const void *table_addr; // address of table, 64 bytes aligned
ptrdiff_t table_addr_offset; // address update between rounds, 64 bytes aligned
uint32_t table_line_pitch; // in pixels, for conflict-free-2D mode it's 4K+2, otherwise any number
uint8_t auto_idx_patch_height;
uint8_t auto_idx_patch_width;
// containing
// bit 0: TRAVERSAL_DIR
// bit 4: MODE (step scale needed or not)
uint8_t auto_idx_fmt;
uint8_t reserved_1;
uint16_t index_agen_lane_ofst; // U12, for transposed access
uint8_t reserved_2;
// containing
// bits 6: 4 dlut_agen_transp_mode
// bit 0 dlut_xy_idx_intrlv_fmt
uint8_t index_agen_fmt;
uint16_t output_agen_lane_ofst; // U12, for transposed access
uint8_t reserved_3;
// containing
// bits 6: 4 dlut_agen_transp_mode
uint8_t output_agen_fmt;
uint32_t reserved_4[3];
const void *index_agen_addr;
ptrdiff_t index_agen_AMOD1;
ptrdiff_t index_agen_AMOD2;
void *output_agen_addr;
ptrdiff_t output_agen_AMOD1;
ptrdiff_t output_agen_AMOD2;
uint32_t reserved_5;
const CupvaSampler *next_task;
#if CUPVA_BUILD_MODE == CUPVA_NATIVE
// Address range check in native mode
// [begin, end)
const void *inputAddrStart;
const void *inputAddrEnd;
const void *indexAddrStart;
const void *indexAddrEnd;
const void *outputAddrStart;
const void *outputAddrEnd;
const void *transformationMatrixAddrStart;
const void *transformationMatrixAddrEnd;
#endif
};
enum CupvaSamplerMode
{
SAMPLER_LOOKUP_1D = 0,
SAMPLER_LOOKUP_2D = 1,
SAMPLER_INTERP_1D = 2,
SAMPLER_INTERP_2D = 3,
SAMPLER_CONFLICT_FREE_2D_INTERP = 4,
SAMPLER_TABLE_REFORMAT = 5,
SAMPLER_INTERP_2D_AUTO_IDX = 6
};
enum CupvaSamplerInputType
{
SAMPLER_INPUT_TYPE_S8 = 0,
SAMPLER_INPUT_TYPE_S16 = 1,
SAMPLER_INPUT_TYPE_S32 = 2,
SAMPLER_INPUT_TYPE_U8 = 4,
SAMPLER_INPUT_TYPE_U16 = 5,
SAMPLER_INPUT_TYPE_U32 = 6
};
enum CupvaSamplerIndexType
{
SAMPLER_INDEX_TYPE_U16 = 1,
SAMPLER_INDEX_TYPE_U32 = 2,
};
enum CupvaSamplerIndexRoundingMode
{
SAMPLER_FRAC_HANDLING_TRUNCATE = 0,
SAMPLER_FRAC_HANDLING_ROUND = 1,
SAMPLER_FRAC_HANDLING_INTERPOLATE = 2,
};
enum CupvaSamplerIndexInterleavingMode
{
SAMPLER_INTERLEAVING_ELEMENTS = 0,
SAMPLER_INTERLEAVING_32B = 1,
};
enum CupvaSamplerTileType
{
SAMPLER_TILE_TRANS_ONLY = 0,
SAMPLER_TILE_TRANS_SCALE = 1,
};
enum CupvaSamplerTileTraversalMode
{
SAMPLER_TRAVERSAL_STANDARD = 0,
SAMPLER_TRAVERSAL_TRANSPOSED = 1,
};
enum CupvaSamplerTaskOutOfRangeHandlingMode
{
SAMPLER_OUT_OF_RANGE_CONSTANT = 1,
SAMPLER_OUT_OF_RANGE_PREDICATE_OFF = 2,
};
enum CupvaSamplerInput2DFlags
{
NO_DEFAULT_LINE_PITCH = 1 << 0,
};
struct CupvaSamplerInput1D
{
void const *data;
CupvaSamplerInputType type;
uint32_t length;
ptrdiff_t inputAdv;
CupvaSamplerTaskOutOfRangeHandlingMode outOfRangeMode;
uint32_t outOfRangeVal;
};
struct CupvaSamplerInput2D
{
void const *data;
CupvaSamplerInputType type;
uint32_t width;
uint32_t height;
uint32_t linePitch;
ptrdiff_t inputAdv;
CupvaSamplerTaskOutOfRangeHandlingMode outOfRangeMode;
uint32_t outOfRangeVal;
uint32_t flags;
};
struct CupvaSamplerIndices1D
{
void const *data;
CupvaSamplerIndexType type;
uint16_t width;
uint16_t height;
uint16_t linePitch;
uint8_t fractionalBits;
CupvaSamplerIndexRoundingMode fractionalHandling;
int32_t offset;
};
struct CupvaSamplerIndices2D
{
void const *data;
CupvaSamplerIndexType type;
uint16_t width;
uint16_t height;
uint16_t linePitch;
uint8_t fractionalBits;
CupvaSamplerIndexRoundingMode fractionalHandling;
int32_t offsetX;
int32_t offsetY;
CupvaSamplerIndexInterleavingMode interleaving;
};
struct CupvaSamplerTiles
{
void const *data;
CupvaSamplerTileType type;
uint8_t width;
uint8_t height;
uint16_t count;
uint8_t fractionalBits;
int32_t offsetX;
int32_t offsetY;
CupvaSamplerTileTraversalMode tileTraversalMode;
};
struct CupvaSamplerOutput
{
void *data;
uint16_t pitch;
TranspositionMode transMode;
};
inline uint16_t _cupvaSamplerGetInputBppLog2(CupvaSamplerInputType const type)
{
uint16_t bpp = 0U;
switch (type)
{
case SAMPLER_INPUT_TYPE_S8:
case SAMPLER_INPUT_TYPE_U8:
bpp = 0U;
break;
case SAMPLER_INPUT_TYPE_S16:
case SAMPLER_INPUT_TYPE_U16:
bpp = 1U;
break;
case SAMPLER_INPUT_TYPE_S32:
case SAMPLER_INPUT_TYPE_U32:
bpp = 2U;
break;
default:
// default log2(bpp) is 0
break;
}
return bpp;
}
inline uint16_t _cupvaSamplerGetIndexBppLog2(CupvaSamplerIndexType const type)
{
uint16_t bpp = 0U;
switch (type)
{
case SAMPLER_INDEX_TYPE_U16:
bpp = 1U;
break;
case SAMPLER_INDEX_TYPE_U32:
bpp = 2U;
break;
default:
// default case log2(bpp) is 0
break;
}
return bpp;
}
inline uint16_t _cupvaSamplerGetIndexPpeLog2(CupvaSamplerMode const mode)
{
uint16_t ppe = 0;
switch (mode)
{
case SAMPLER_LOOKUP_2D:
case SAMPLER_INTERP_2D:
case SAMPLER_CONFLICT_FREE_2D_INTERP:
ppe = 1U;
break;
case SAMPLER_LOOKUP_1D:
case SAMPLER_INTERP_1D:
case SAMPLER_TABLE_REFORMAT:
ppe = 0U;
break;
default:
ppe = 0U;
break;
}
return ppe;
}
inline int32_t _cupvaSamplerAmod2(uint32_t byteOffset, uint32_t n1Iters, int32_t amod1, uint32_t bpe)
{
uint32_t const alignedN1 = CUPVA_SAMPLER_ALIGNED(n1Iters, (uint32_t)CUPVA_SAMPLER_AMOD1, bpe);
uint32_t const alignedN1Minus1 = alignedN1 - 1U;
int32_t const n1Int = (int32_t)alignedN1Minus1;
int32_t const byteOffsetInt = (int32_t)byteOffset;
return byteOffsetInt - (n1Int * amod1);
}
#if CUPVA_BUILD_MODE == CUPVA_NATIVE
void cupvaSamplerSetupAddrRangeChecks(CupvaSampler *task);
#else
# define cupvaSamplerSetupAddrRangeChecks(x)
#endif
inline void cupvaSamplerSetup(CupvaSampler *task, CupvaSamplerInput1D const *input,
CupvaSamplerIndices1D const *indices, CupvaSamplerOutput const *output)
{
CupvaSamplerMode const taskMode =
(indices->fractionalHandling == SAMPLER_FRAC_HANDLING_INTERPOLATE) ? SAMPLER_INTERP_1D : SAMPLER_LOOKUP_1D;
uint16_t const rndMode = (indices->fractionalHandling == SAMPLER_FRAC_HANDLING_INTERPOLATE)
? (uint16_t)0U
: (uint16_t)indices->fractionalHandling;
uint16_t const taskN1 = indices->width;
uint16_t const taskN2 = indices->height;
uint16_t const linePitch = (indices->linePitch == 0U) ? indices->width : indices->linePitch;
// Configure task info
// Enable duplicate detection and consolidation by default.
task->task_info = ((uint16_t)taskMode << CUPVA_SAMPLER_TASKINFO_TASK_MODE_SHIFT);
task->task_info |= ((uint16_t)input->type << CUPVA_SAMPLER_TASKINFO_ENTRY_TYPE_SHIFT);
task->task_info |= ((uint16_t)indices->type << CUPVA_SAMPLER_TASKINFO_INDEX_TYPE_SHIFT);
task->task_info |= ((uint16_t)input->outOfRangeMode << CUPVA_SAMPLER_TASKINFO_OUT_OF_RANGE_HANDLING_SHIFT);
task->task_info |= (rndMode << CUPVA_SAMPLER_TASKINFO_IDX_RND_MODE_SHIFT);
task->task_len_N1 = taskN1;
task->task_len_N2 = taskN2;
task->sentinel_value = input->outOfRangeVal;
// Configure table data.
task->table_addr = input->data;
task->table_addr_offset = input->inputAdv;
task->x_int_limit = input->length;
task->y_int_limit = 0U;
// Configure index.
uint16_t const idxBpp = _cupvaSamplerGetIndexBppLog2(indices->type);
uint32_t const indexN1Bytes = (uint32_t)linePitch << idxBpp;
task->x_offset = indices->offset;
task->fraction_bits = indices->fractionalBits;
task->index_agen_fmt = 0U;
task->index_agen_addr = indices->data;
task->index_agen_lane_ofst = 0U;
task->index_agen_AMOD1 = (int32_t)CUPVA_SAMPLER_AMOD1;
task->index_agen_AMOD2 = _cupvaSamplerAmod2(indexN1Bytes, taskN1, (int32_t)CUPVA_SAMPLER_AMOD1, idxBpp);
// Configure output.
uint16_t const outputBpp = _cupvaSamplerGetInputBppLog2(input->type);
uint32_t const outputLPBytes = (uint32_t)output->pitch << outputBpp;
// Only TRANS_MODE_1 is supported
uint8_t const transMode = (output->transMode == TRANS_MODE_NONE) ? 0U : 1U;
uint8_t const transHOffset = (output->transMode == TRANS_MODE_NONE) ? 0U : (1U << outputBpp);
CUPVA_VPU_ASSERT(outputLPBytes >= transHOffset);
uint32_t const laneOfst32 = (outputLPBytes - (uint32_t)transHOffset) / VMEM_SUPERBANK_WIDTH_IN_BYTES;
CUPVA_VPU_ASSERT(laneOfst32 <= UINT16_MAX);
uint16_t const laneOfst = (uint16_t)laneOfst32;
task->output_agen_fmt = (transMode << CUPVA_SAMPLER_OUTPUT_TRANS_MODE_SHIFT);
task->output_agen_lane_ofst = (output->transMode == TRANS_MODE_NONE) ? 0U : laneOfst;
task->output_agen_addr = output->data;
uint32_t const transposeAmod1 = ((uint32_t)CUPVA_SAMPLER_AMOD1 >> outputBpp) * outputLPBytes;
task->output_agen_AMOD1 =
(output->transMode == TRANS_MODE_NONE) ? (int32_t)CUPVA_SAMPLER_AMOD1 : (int32_t)transposeAmod1;
uint32_t const outputN2Bytes = (output->transMode == TRANS_MODE_NONE) ? outputLPBytes : transHOffset;
int32_t const outAmod2 = _cupvaSamplerAmod2(outputN2Bytes, taskN1, task->output_agen_AMOD1, outputBpp);
task->output_agen_AMOD2 = (outputLPBytes == 0U) ? (int32_t)0 : outAmod2;
task->next_task = NULL;
cupvaSamplerSetupAddrRangeChecks(task);
}
inline void cupvaSamplerSetup(CupvaSampler *task, CupvaSamplerInput2D const *input,
CupvaSamplerIndices2D const *indices, CupvaSamplerOutput const *output)
{
CupvaSamplerMode const taskMode =
(indices->fractionalHandling == SAMPLER_FRAC_HANDLING_INTERPOLATE) ? SAMPLER_INTERP_2D : SAMPLER_LOOKUP_2D;
uint16_t const rndMode = (indices->fractionalHandling == SAMPLER_FRAC_HANDLING_INTERPOLATE)
? (uint16_t)0U
: (uint16_t)indices->fractionalHandling;
uint16_t const taskN1 = indices->width;
uint16_t const taskN2 = indices->height;
uint16_t const indicesLinePitch = (indices->linePitch == 0U) ? indices->width : indices->linePitch;
CupvaSamplerInput2DFlags const noDefaultLinePitchFlag = NO_DEFAULT_LINE_PITCH;
uint32_t const inputLinePitch =
((input->linePitch == 0U) && ((input->flags & ((uint32_t)noDefaultLinePitchFlag)) == 0U)) ? input->width
: input->linePitch;
// Configure task info
// Enable duplicate detection and consolidation by default.
task->task_info = CUPVA_SAMPLER_DUPLICATE_HANDLING_EN;
task->task_info |= (uint16_t)taskMode << CUPVA_SAMPLER_TASKINFO_TASK_MODE_SHIFT;
task->task_info |= ((uint16_t)input->type << CUPVA_SAMPLER_TASKINFO_ENTRY_TYPE_SHIFT);
task->task_info |= ((uint16_t)indices->type << CUPVA_SAMPLER_TASKINFO_INDEX_TYPE_SHIFT);
task->task_info |= ((uint16_t)input->outOfRangeMode << CUPVA_SAMPLER_TASKINFO_OUT_OF_RANGE_HANDLING_SHIFT);
task->task_info |= ((uint16_t)rndMode << CUPVA_SAMPLER_TASKINFO_IDX_RND_MODE_SHIFT);
task->task_len_N1 = taskN1;
task->task_len_N2 = taskN2;
task->sentinel_value = input->outOfRangeVal;
// Configure table data.
task->table_addr = input->data;
task->table_addr_offset = input->inputAdv;
task->table_line_pitch = inputLinePitch;
task->x_int_limit = input->width;
task->y_int_limit = input->height;
// Configure index.
uint16_t const idxBpp = _cupvaSamplerGetIndexBppLog2(indices->type);
uint16_t const idxBpe = idxBpp + _cupvaSamplerGetIndexPpeLog2(taskMode);
uint32_t const indexN1Bytes = (uint32_t)indicesLinePitch << idxBpe;
task->x_offset = indices->offsetX;
task->y_offset = indices->offsetY;
task->fraction_bits = indices->fractionalBits;
task->index_agen_fmt = (uint8_t)indices->interleaving;
task->index_agen_addr = indices->data;
task->index_agen_lane_ofst = 0U;
task->index_agen_AMOD1 = (int32_t)CUPVA_SAMPLER_AMOD1;
task->index_agen_AMOD2 = _cupvaSamplerAmod2(indexN1Bytes, taskN1, (int32_t)CUPVA_SAMPLER_AMOD1, idxBpe);
// Configure output.
uint16_t const outputBpp = _cupvaSamplerGetInputBppLog2(input->type);
uint32_t const outputLPBytes = (uint32_t)output->pitch << outputBpp;
// Only TRANS_MODE_1 is supported
uint8_t const transMode = (output->transMode == TRANS_MODE_NONE) ? 0U : 1U;
uint8_t const transHOffset = (output->transMode == TRANS_MODE_NONE) ? 0U : (1U << outputBpp);
CUPVA_VPU_ASSERT(outputLPBytes >= transHOffset);
uint32_t const laneOfst32 = (outputLPBytes - (uint32_t)transHOffset) / VMEM_SUPERBANK_WIDTH_IN_BYTES;
CUPVA_VPU_ASSERT(laneOfst32 <= UINT16_MAX);
uint16_t const laneOfst = (uint16_t)laneOfst32;
task->output_agen_fmt = (transMode << CUPVA_SAMPLER_OUTPUT_TRANS_MODE_SHIFT);
task->output_agen_lane_ofst = (output->transMode == TRANS_MODE_NONE) ? 0U : laneOfst;
task->output_agen_addr = output->data;
uint32_t const transposeAmod1 = ((uint32_t)CUPVA_SAMPLER_AMOD1 >> outputBpp) * outputLPBytes;
task->output_agen_AMOD1 =
(output->transMode == TRANS_MODE_NONE) ? (int32_t)CUPVA_SAMPLER_AMOD1 : (int32_t)transposeAmod1;
uint32_t const outputN2Bytes = (output->transMode == TRANS_MODE_NONE) ? outputLPBytes : transHOffset;
int32_t const outAmod2 = _cupvaSamplerAmod2(outputN2Bytes, taskN1, task->output_agen_AMOD1, outputBpp);
task->output_agen_AMOD2 = (outputLPBytes == 0U) ? (int32_t)0 : outAmod2;
task->next_task = NULL;
cupvaSamplerSetupAddrRangeChecks(task);
}
inline void cupvaSamplerSetup(CupvaSampler *task, CupvaSamplerInput2D const *input, CupvaSamplerTiles const *tiles,
CupvaSamplerOutput const *output)
{
CupvaSamplerMode const taskMode = SAMPLER_INTERP_2D_AUTO_IDX;
uint16_t const taskN1 = (uint16_t)(tiles->width) * tiles->height;
uint16_t const taskN2 = tiles->count;
uint32_t const inputLinePitch = (input->linePitch == 0U) ? input->width : input->linePitch;
// Configure task info
// Enable duplicate detection and consolidation by default.
task->task_info = CUPVA_SAMPLER_DUPLICATE_HANDLING_EN;
task->task_info |= (uint16_t)taskMode << CUPVA_SAMPLER_TASKINFO_TASK_MODE_SHIFT;
task->task_info |= (uint16_t)input->type << CUPVA_SAMPLER_TASKINFO_ENTRY_TYPE_SHIFT;
task->task_info |= ((uint16_t)SAMPLER_INDEX_TYPE_U32 << CUPVA_SAMPLER_TASKINFO_INDEX_TYPE_SHIFT);
task->task_info |= (uint16_t)input->outOfRangeMode << CUPVA_SAMPLER_TASKINFO_OUT_OF_RANGE_HANDLING_SHIFT;
task->task_len_N1 = taskN1;
task->task_len_N2 = taskN2;
task->sentinel_value = input->outOfRangeVal;
// Configure table data.
task->table_addr = input->data;
task->table_addr_offset = input->inputAdv;
task->table_line_pitch = inputLinePitch;
task->x_int_limit = input->width;
task->y_int_limit = input->height;
// Configure index.
task->x_offset = tiles->offsetX;
task->y_offset = tiles->offsetY;
task->fraction_bits = tiles->fractionalBits;
task->auto_idx_patch_width = tiles->width;
task->auto_idx_patch_height = tiles->height;
task->index_agen_addr = tiles->data;
task->index_agen_AMOD1 = (int32_t)CUPVA_SAMPLER_AMOD1;
task->auto_idx_fmt = (uint8_t)tiles->tileTraversalMode | ((uint8_t)tiles->type << CUPVA_SAMPLER_AUTOIDX_MODE_SHIFT);
// Configure output.
uint16_t const outputBpp = _cupvaSamplerGetInputBppLog2(input->type);
uint32_t const outputLPBytes = (uint32_t)output->pitch << outputBpp;
// Only TRANS_MODE_1 is supported
uint8_t const transMode = (output->transMode == TRANS_MODE_NONE) ? 0U : 1U;
uint8_t const transHOffset = (output->transMode == TRANS_MODE_NONE) ? 0U : (1U << outputBpp);
CUPVA_VPU_ASSERT(outputLPBytes >= transHOffset);
uint32_t const laneOfst32 = (outputLPBytes - (uint32_t)transHOffset) / VMEM_SUPERBANK_WIDTH_IN_BYTES;
CUPVA_VPU_ASSERT(laneOfst32 <= UINT16_MAX);
uint16_t const laneOfst = (uint16_t)laneOfst32;
task->output_agen_fmt = (transMode << CUPVA_SAMPLER_OUTPUT_TRANS_MODE_SHIFT);
task->output_agen_lane_ofst = (output->transMode == TRANS_MODE_NONE) ? 0U : laneOfst;
task->output_agen_addr = output->data;
uint32_t const transposeAmod1 = ((uint32_t)CUPVA_SAMPLER_AMOD1 >> outputBpp) * outputLPBytes;
task->output_agen_AMOD1 =
(output->transMode == TRANS_MODE_NONE) ? (int32_t)CUPVA_SAMPLER_AMOD1 : (int32_t)transposeAmod1;
uint32_t const outputN2Bytes = (output->transMode == TRANS_MODE_NONE) ? outputLPBytes : transHOffset;
int32_t const outAmod2 = _cupvaSamplerAmod2(outputN2Bytes, taskN1, task->output_agen_AMOD1, outputBpp);
task->output_agen_AMOD2 = (outputLPBytes == 0U) ? (int32_t)0 : outAmod2;
task->next_task = NULL;
cupvaSamplerSetupAddrRangeChecks(task);
}
inline void cupvaSamplerStart(CupvaSampler const *head)
{
#if CUPVA_BUILD_MODE != CUPVA_NATIVE
chess_memory_fence();
cp_store((intptr_t)head, CUPVA_SAMPLER_CFG_ADDR);
# if defined(CUPVA_PROFILING_FLAG) && CUPVA_PROFILING_FLAG == 1
while ((gpi_rd() & CUPVA_SAMPLER_GPIO) != 0); // wait for DONE to be deasserted
# else
wfe_gpi(CUPVA_SAMPLER_GPIO, 0); // wait for DONE to be deasserted
# endif
gpo_setl(CUPVA_SAMPLER_GPIO); // assert START
#else
start_dlut((const dlut_param_cfg *)head);
#endif
}
inline void cupvaSamplerWait(void)
{
#if CUPVA_BUILD_MODE != CUPVA_NATIVE
# if defined(CUPVA_PROFILING_FLAG) && CUPVA_PROFILING_FLAG == 1
while ((gpi_rd() & CUPVA_SAMPLER_GPIO) == 0); // wait for DONE to be asserted
# else
wfe_gpi(CUPVA_SAMPLER_GPIO, CUPVA_SAMPLER_GPIO); // wait for DONE to be asserted
# endif
gpo_clrl(CUPVA_SAMPLER_GPIO); // deassert START
#else
check_dlut_done_and_clear_start();
#endif
}
inline void cupvaSamplerUpdateAddrs(CupvaSampler *task, void const *input, void const *indices, void *output)
{
task->table_addr = input;
task->index_agen_addr = indices;
task->output_agen_addr = output;
cupvaSamplerSetupAddrRangeChecks(task);
}
inline void cupvaSamplerUpdateInputAttributes(CupvaSampler *task, uint32_t const width, uint32_t const height,
uint32_t const linePitch)
{
task->x_int_limit = width;
task->y_int_limit = height;
task->table_line_pitch = linePitch;
cupvaSamplerSetupAddrRangeChecks(task);
}
inline void cupvaSamplerLink(CupvaSampler *task, CupvaSampler const *next)
{
task->next_task = next;
}
#endif