sequence_dataflow.h#
Fully qualified name: src/device/vpu_runtime/include/cupva_device/sequence_dataflow.h
File members: src/device/vpu_runtime/include/cupva_device/sequence_dataflow.h
/*
* Copyright (c) 2023 NVIDIA Corporation. All rights reserved.
*
* NVIDIA Corporation and its licensors retain all intellectual property
* and proprietary rights in and to this software, related documentation
* and any modifications thereto. Any use, reproduction, disclosure or
* distribution of this software and related documentation without an express
* license agreement from NVIDIA Corporation is strictly prohibited.
*/
#ifndef CUPVA_DEVICE_SEQUENCE_DATAFLOW_H
#define CUPVA_DEVICE_SEQUENCE_DATAFLOW_H
#include "impl/dma_common.h"
#include <cupva_types.h>
// Implementing address updates requires up to 3 writes
#define SEQDF_MAX_WRITES_PER_ADDR_UPDATE 4U
// Implementing tile update requires 1 write
#define SEQDF_MAX_WRITES_PER_TILE_UPDATE 1U
// Compute size of the VPUC table for commands
#define SEQDF_VPUC_ENTRIES \
((SEQDF_MAX_WRITES_PER_ADDR_UPDATE + SEQDF_MAX_WRITES_PER_TILE_UPDATE) * SEQUENCE_DATAFLOW_MAX_TRANSFERS)
// VPUC table needs special magic number
#define SEQDF_VPUC_MAGIC 0xDEADC0DEU
struct SequenceDataFlowVPUCEntry
{
uint32_t value;
uint32_t address;
};
struct SequenceDataFlowVPUCTable
{
uint32_t entryCount;
uint32_t magic;
SequenceDataFlowVPUCEntry entries[SEQDF_VPUC_ENTRIES];
};
enum MemType
{
MEMTYPE_INVALID = 0,
MEMTYPE_DRAM,
MEMTYPE_VMEM,
MEMTYPE_L2SRAM
};
struct SequenceDataFlowHandler
{
SequenceDataFlowVPUCTable vpuCTable;
SequenceDataFlowParameters metadata;
uint32_t entryCount;
uint32_t currentTrigIdx;
};
#define VMEM_SEQDF_HANDLER(bank, name) \
VMEM_VAR(bank, SequenceDataFlowHandler, name); \
CUPVA_EXPORT(name.vpuCTable, _V##name, VMEM_TYPE_VPUC_TABLE); \
CUPVA_EXPORT(name.metadata, _M##name);
inline void cupvaSQDFUpdateAddr(SequenceDataFlowHandler &handler, uint8_t const transferIdx, uint64_t const srcAddr,
MemType const srcMemType, uint64_t const dstAddr, MemType const dstMemType)
{
uint32_t entryCount = handler.entryCount;
uint32_t const descId = handler.metadata.transfers[transferIdx].desc_id;
uint32_t const descLinkId = handler.metadata.transfers[transferIdx].link_did;
uint32_t const descCtlAddr = CUPVA_GET_DESC_ATTR_ADDR(descId, DESCR_CNTL);
uint32_t const srcLoBitsAddr = CUPVA_GET_DESC_ATTR_ADDR(descId, SRC_ADR);
uint32_t const dstLoBitsAddr = CUPVA_GET_DESC_ATTR_ADDR(descId, DST_ADR);
uint32_t const dstm = (uint32_t)srcMemType;
uint32_t const ddtm = (uint32_t)dstMemType;
uint32_t const descCtl = CUPVA_DMA_FIELD(DESCR_CNTL, SRC_ADDR1, (uint32_t)(srcAddr >> 32U)) |
CUPVA_DMA_FIELD(DESCR_CNTL, DST_ADDR1, (uint32_t)(dstAddr >> 32U)) |
CUPVA_DMA_FIELD(DESCR_CNTL, SRC_TF, (uint32_t)(srcAddr >> 40U)) |
CUPVA_DMA_FIELD(DESCR_CNTL, DST_TF, (uint32_t)(dstAddr >> 40U)) |
CUPVA_DMA_FIELD(DESCR_CNTL, DSTM, dstm) | CUPVA_DMA_FIELD(DESCR_CNTL, DDTM, ddtm) |
CUPVA_DMA_FIELD(DESCR_CNTL, LINK_DID, descLinkId);
handler.vpuCTable.entries[entryCount].address = descCtlAddr;
handler.vpuCTable.entries[entryCount].value = descCtl;
entryCount++;
handler.vpuCTable.entries[entryCount].address = srcLoBitsAddr;
handler.vpuCTable.entries[entryCount].value = (uint32_t)srcAddr;
entryCount++;
handler.vpuCTable.entries[entryCount].address = dstLoBitsAddr;
handler.vpuCTable.entries[entryCount].value = (uint32_t)dstAddr;
entryCount++;
handler.entryCount = entryCount;
}
inline void cupvaSQDFUpdateAddr(SequenceDataFlowHandler &handler, uint8_t const transferIdx, uint64_t const srcAddr,
MemType const srcMemType, uint16_t const srcLp, uint64_t const dstAddr,
MemType const dstMemType, uint16_t const dstLp)
{
uint32_t entryCount = handler.entryCount;
uint32_t const descId = handler.metadata.transfers[transferIdx].desc_id;
uint32_t const descLinkId = handler.metadata.transfers[transferIdx].link_did;
uint32_t const descCtlAddr = CUPVA_GET_DESC_ATTR_ADDR(descId, DESCR_CNTL);
uint32_t const srcLoBitsAddr = CUPVA_GET_DESC_ATTR_ADDR(descId, SRC_ADR);
uint32_t const dstLoBitsAddr = CUPVA_GET_DESC_ATTR_ADDR(descId, DST_ADR);
uint32_t const lpCntlAddr = CUPVA_GET_DESC_ATTR_ADDR(descId, LP_CNTL);
uint32_t const dstm = (uint32_t)srcMemType;
uint32_t const ddtm = (uint32_t)dstMemType;
uint32_t const descCtl = CUPVA_DMA_FIELD(DESCR_CNTL, SRC_ADDR1, (uint32_t)(srcAddr >> 32U)) |
CUPVA_DMA_FIELD(DESCR_CNTL, DST_ADDR1, (uint32_t)(dstAddr >> 32U)) |
CUPVA_DMA_FIELD(DESCR_CNTL, SRC_TF, (uint32_t)(srcAddr >> 40U)) |
CUPVA_DMA_FIELD(DESCR_CNTL, DST_TF, (uint32_t)(dstAddr >> 40U)) |
CUPVA_DMA_FIELD(DESCR_CNTL, DSTM, dstm) | CUPVA_DMA_FIELD(DESCR_CNTL, DDTM, ddtm) |
CUPVA_DMA_FIELD(DESCR_CNTL, LINK_DID, descLinkId);
uint32_t const lpCntl =
CUPVA_DMA_FIELD(LP_CNTL, SLP_ADV, (uint32_t)srcLp) | CUPVA_DMA_FIELD(LP_CNTL, DLP_ADV, (uint32_t)dstLp);
handler.vpuCTable.entries[entryCount].address = descCtlAddr;
handler.vpuCTable.entries[entryCount].value = descCtl;
entryCount++;
handler.vpuCTable.entries[entryCount].address = srcLoBitsAddr;
handler.vpuCTable.entries[entryCount].value = (uint32_t)srcAddr;
entryCount++;
handler.vpuCTable.entries[entryCount].address = dstLoBitsAddr;
handler.vpuCTable.entries[entryCount].value = (uint32_t)dstAddr;
entryCount++;
handler.vpuCTable.entries[entryCount].address = lpCntlAddr;
handler.vpuCTable.entries[entryCount].value = lpCntl;
entryCount++;
handler.entryCount = entryCount;
}
inline void cupvaSQDFUpdateTileSize(SequenceDataFlowHandler &handler, uint8_t const transferIdx, uint16_t const tx,
uint16_t const ty)
{
uint32_t entryCount = handler.entryCount;
uint32_t const descId = handler.metadata.transfers[transferIdx].desc_id;
uint32_t const tileCtlAddr = CUPVA_GET_DESC_ATTR_ADDR(descId, TILE_CNTL);
handler.vpuCTable.entries[entryCount].address = tileCtlAddr;
handler.vpuCTable.entries[entryCount].value =
CUPVA_DMA_FIELD(TILE_CNTL, TX, (uint32_t)tx) | CUPVA_DMA_FIELD(TILE_CNTL, TY, (uint32_t)ty);
entryCount++;
handler.entryCount = entryCount;
}
inline void cupvaSQDFOpen(SequenceDataFlowHandler &handler)
{
handler.vpuCTable.magic = SEQDF_VPUC_MAGIC;
handler.entryCount = 1;
handler.vpuCTable.entryCount = 1;
// First table entry is noop - just re-write our own SRC address
uint32_t const descId = handler.metadata.config_id;
handler.vpuCTable.entries[0].address = CUPVA_GET_DESC_ATTR_ADDR(descId, SRC_ADR);
handler.vpuCTable.entries[0].value = cupvaGetVmemAddress(&handler.vpuCTable);
}
inline void _cupvaSQDFTrigCommon(SequenceDataFlowHandler const &handler)
{
uint32_t const currentTrigIdx = handler.currentTrigIdx;
uint32_t const triggerMask = (currentTrigIdx == 0U) ? handler.metadata.cfg_trig : handler.metadata.data_trig;
chess_memory_fence();
cupvaDataFlowTrig(triggerMask);
}
inline void cupvaSQDFFlushAndTrig(SequenceDataFlowHandler &handler)
{
if (handler.entryCount > 1U)
{
handler.vpuCTable.entryCount = handler.entryCount;
handler.entryCount = 1;
}
_cupvaSQDFTrigCommon(handler);
}
inline void cupvaSQDFTrig(SequenceDataFlowHandler &handler)
{
handler.vpuCTable.entryCount = 1;
_cupvaSQDFTrigCommon(handler);
}
inline void cupvaSQDFSync(SequenceDataFlowHandler &handler)
{
uint32_t const currentTrigIdx = handler.currentTrigIdx;
uint32_t const triggerMask = (currentTrigIdx == 0U) ? handler.metadata.cfg_trig : handler.metadata.data_trig;
handler.currentTrigIdx = (uint32_t)mod_inc((int32_t)currentTrigIdx, (int32_t)handler.metadata.trigger_count - 1);
cupvaDataFlowSync(triggerMask);
}
inline void cupvaSQDFSync(SequenceDataFlowHandler **handlers, uint32_t num_handlers)
{
uint32_t triggerMask = 0;
for (uint32_t i = 0; i < num_handlers; i++)
{
uint32_t const currentTrigIdx = handlers[i]->currentTrigIdx;
uint32_t const trigger =
(currentTrigIdx == 0U) ? handlers[i]->metadata.cfg_trig : handlers[i]->metadata.data_trig;
handlers[i]->currentTrigIdx =
(uint32_t)mod_inc((int32_t)currentTrigIdx, (int32_t)handlers[i]->metadata.trigger_count - 1);
triggerMask |= trigger;
}
cupvaDataFlowSync(triggerMask);
}
inline void cupvaSQDFClose(SequenceDataFlowHandler &handler)
{
uint32_t entryCount = 0U;
uint32_t const descId = handler.metadata.transfers[0].desc_id;
// Config the second descriptor to point at the VPU CFG table for src/dst
handler.vpuCTable.entries[entryCount].address = CUPVA_GET_DESC_ATTR_ADDR(descId, SRC_ADR);
handler.vpuCTable.entries[entryCount].value = cupvaGetVmemAddress(&handler.vpuCTable);
entryCount++;
handler.vpuCTable.entries[entryCount].address = CUPVA_GET_DESC_ATTR_ADDR(descId, DST_ADR);
handler.vpuCTable.entries[entryCount].value = cupvaGetVmemAddress(&handler.vpuCTable);
entryCount++;
// Config the second descriptor to have null linkid, src & dst VMEM
handler.vpuCTable.entries[entryCount].address = CUPVA_GET_DESC_ATTR_ADDR(descId, DESCR_CNTL);
handler.vpuCTable.entries[entryCount].value = CUPVA_DMA_FIELD(DESCR_CNTL, DSTM, (uint32_t)DMA_TRANS_MODE_VMEM) |
CUPVA_DMA_FIELD(DESCR_CNTL, DDTM, (uint32_t)DMA_TRANS_MODE_VMEM);
entryCount++;
// Config the second descriptor to have ITC = 1
handler.vpuCTable.entries[entryCount].address = CUPVA_GET_DESC_ATTR_ADDR(descId, TRANS_CNTL);
handler.vpuCTable.entries[entryCount].value = CUPVA_DMA_FIELD(TRANS_CNTL, ITC, 1U);
entryCount++;
// Config the second descriptor to have tx=ty=0
handler.vpuCTable.entries[entryCount].address = CUPVA_GET_DESC_ATTR_ADDR(descId, TILE_CNTL);
handler.vpuCTable.entries[entryCount].value = 0;
entryCount++;
// Config the second descriptor to have ECET=0
handler.vpuCTable.entries[entryCount].address = CUPVA_GET_DESC_ATTR_ADDR(descId, EVENT_CNTL);
handler.vpuCTable.entries[entryCount].value =
CUPVA_DMA_FIELD(EVENT_CNTL, TRIG_VPU_HW_EVENTS, (uint32_t)handler.metadata.xfer0_trigger);
entryCount++;
handler.vpuCTable.entryCount = entryCount;
cupvaDataFlowTrig(handler.metadata.cfg_trig);
cupvaDataFlowSync(handler.metadata.cfg_trig);
}
#endif