sequence_dataflow.h#

Fully qualified name: src/device/vpu_runtime/include/cupva_device/sequence_dataflow.h

File members: src/device/vpu_runtime/include/cupva_device/sequence_dataflow.h

/*
 * Copyright (c) 2023 NVIDIA Corporation.  All rights reserved.
 *
 * NVIDIA Corporation and its licensors retain all intellectual property
 * and proprietary rights in and to this software, related documentation
 * and any modifications thereto.  Any use, reproduction, disclosure or
 * distribution of this software and related documentation without an express
 * license agreement from NVIDIA Corporation is strictly prohibited.
 */

#ifndef CUPVA_DEVICE_SEQUENCE_DATAFLOW_H
#define CUPVA_DEVICE_SEQUENCE_DATAFLOW_H

#include "impl/dma_common.h"

#include <cupva_types.h>

// Implementing address updates requires up to 3 writes
#define SEQDF_MAX_WRITES_PER_ADDR_UPDATE 4U
// Implementing tile update requires 1 write
#define SEQDF_MAX_WRITES_PER_TILE_UPDATE 1U

// Compute size of the VPUC table for commands
#define SEQDF_VPUC_ENTRIES \
    ((SEQDF_MAX_WRITES_PER_ADDR_UPDATE + SEQDF_MAX_WRITES_PER_TILE_UPDATE) * SEQUENCE_DATAFLOW_MAX_TRANSFERS)

// VPUC table needs special magic number
#define SEQDF_VPUC_MAGIC 0xDEADC0DEU

struct SequenceDataFlowVPUCEntry
{
    uint32_t value;
    uint32_t address;
};
struct SequenceDataFlowVPUCTable
{
    uint32_t entryCount;
    uint32_t magic;
    SequenceDataFlowVPUCEntry entries[SEQDF_VPUC_ENTRIES];
};

enum MemType
{
    MEMTYPE_INVALID = 0,
    MEMTYPE_DRAM,
    MEMTYPE_VMEM,
    MEMTYPE_L2SRAM
};

struct SequenceDataFlowHandler
{
    SequenceDataFlowVPUCTable vpuCTable;
    SequenceDataFlowParameters metadata;
    uint32_t entryCount;
    uint32_t currentTrigIdx;
};

#define VMEM_SEQDF_HANDLER(bank, name)                            \
    VMEM_VAR(bank, SequenceDataFlowHandler, name);                \
    CUPVA_EXPORT(name.vpuCTable, _V##name, VMEM_TYPE_VPUC_TABLE); \
    CUPVA_EXPORT(name.metadata, _M##name);

inline void cupvaSQDFUpdateAddr(SequenceDataFlowHandler &handler, uint8_t const transferIdx, uint64_t const srcAddr,
                                MemType const srcMemType, uint64_t const dstAddr, MemType const dstMemType)
{
    uint32_t entryCount          = handler.entryCount;
    uint32_t const descId        = handler.metadata.transfers[transferIdx].desc_id;
    uint32_t const descLinkId    = handler.metadata.transfers[transferIdx].link_did;
    uint32_t const descCtlAddr   = CUPVA_GET_DESC_ATTR_ADDR(descId, DESCR_CNTL);
    uint32_t const srcLoBitsAddr = CUPVA_GET_DESC_ATTR_ADDR(descId, SRC_ADR);
    uint32_t const dstLoBitsAddr = CUPVA_GET_DESC_ATTR_ADDR(descId, DST_ADR);
    uint32_t const dstm          = (uint32_t)srcMemType;
    uint32_t const ddtm          = (uint32_t)dstMemType;
    uint32_t const descCtl       = CUPVA_DMA_FIELD(DESCR_CNTL, SRC_ADDR1, (uint32_t)(srcAddr >> 32U)) |
                             CUPVA_DMA_FIELD(DESCR_CNTL, DST_ADDR1, (uint32_t)(dstAddr >> 32U)) |
                             CUPVA_DMA_FIELD(DESCR_CNTL, SRC_TF, (uint32_t)(srcAddr >> 40U)) |
                             CUPVA_DMA_FIELD(DESCR_CNTL, DST_TF, (uint32_t)(dstAddr >> 40U)) |
                             CUPVA_DMA_FIELD(DESCR_CNTL, DSTM, dstm) | CUPVA_DMA_FIELD(DESCR_CNTL, DDTM, ddtm) |
                             CUPVA_DMA_FIELD(DESCR_CNTL, LINK_DID, descLinkId);

    handler.vpuCTable.entries[entryCount].address = descCtlAddr;
    handler.vpuCTable.entries[entryCount].value   = descCtl;
    entryCount++;

    handler.vpuCTable.entries[entryCount].address = srcLoBitsAddr;
    handler.vpuCTable.entries[entryCount].value   = (uint32_t)srcAddr;
    entryCount++;

    handler.vpuCTable.entries[entryCount].address = dstLoBitsAddr;
    handler.vpuCTable.entries[entryCount].value   = (uint32_t)dstAddr;
    entryCount++;

    handler.entryCount = entryCount;
}

inline void cupvaSQDFUpdateAddr(SequenceDataFlowHandler &handler, uint8_t const transferIdx, uint64_t const srcAddr,
                                MemType const srcMemType, uint16_t const srcLp, uint64_t const dstAddr,
                                MemType const dstMemType, uint16_t const dstLp)
{
    uint32_t entryCount          = handler.entryCount;
    uint32_t const descId        = handler.metadata.transfers[transferIdx].desc_id;
    uint32_t const descLinkId    = handler.metadata.transfers[transferIdx].link_did;
    uint32_t const descCtlAddr   = CUPVA_GET_DESC_ATTR_ADDR(descId, DESCR_CNTL);
    uint32_t const srcLoBitsAddr = CUPVA_GET_DESC_ATTR_ADDR(descId, SRC_ADR);
    uint32_t const dstLoBitsAddr = CUPVA_GET_DESC_ATTR_ADDR(descId, DST_ADR);
    uint32_t const lpCntlAddr    = CUPVA_GET_DESC_ATTR_ADDR(descId, LP_CNTL);
    uint32_t const dstm          = (uint32_t)srcMemType;
    uint32_t const ddtm          = (uint32_t)dstMemType;
    uint32_t const descCtl       = CUPVA_DMA_FIELD(DESCR_CNTL, SRC_ADDR1, (uint32_t)(srcAddr >> 32U)) |
                             CUPVA_DMA_FIELD(DESCR_CNTL, DST_ADDR1, (uint32_t)(dstAddr >> 32U)) |
                             CUPVA_DMA_FIELD(DESCR_CNTL, SRC_TF, (uint32_t)(srcAddr >> 40U)) |
                             CUPVA_DMA_FIELD(DESCR_CNTL, DST_TF, (uint32_t)(dstAddr >> 40U)) |
                             CUPVA_DMA_FIELD(DESCR_CNTL, DSTM, dstm) | CUPVA_DMA_FIELD(DESCR_CNTL, DDTM, ddtm) |
                             CUPVA_DMA_FIELD(DESCR_CNTL, LINK_DID, descLinkId);
    uint32_t const lpCntl =
        CUPVA_DMA_FIELD(LP_CNTL, SLP_ADV, (uint32_t)srcLp) | CUPVA_DMA_FIELD(LP_CNTL, DLP_ADV, (uint32_t)dstLp);

    handler.vpuCTable.entries[entryCount].address = descCtlAddr;
    handler.vpuCTable.entries[entryCount].value   = descCtl;
    entryCount++;

    handler.vpuCTable.entries[entryCount].address = srcLoBitsAddr;
    handler.vpuCTable.entries[entryCount].value   = (uint32_t)srcAddr;
    entryCount++;

    handler.vpuCTable.entries[entryCount].address = dstLoBitsAddr;
    handler.vpuCTable.entries[entryCount].value   = (uint32_t)dstAddr;
    entryCount++;

    handler.vpuCTable.entries[entryCount].address = lpCntlAddr;
    handler.vpuCTable.entries[entryCount].value   = lpCntl;
    entryCount++;

    handler.entryCount = entryCount;
}

inline void cupvaSQDFUpdateTileSize(SequenceDataFlowHandler &handler, uint8_t const transferIdx, uint16_t const tx,
                                    uint16_t const ty)
{
    uint32_t entryCount        = handler.entryCount;
    uint32_t const descId      = handler.metadata.transfers[transferIdx].desc_id;
    uint32_t const tileCtlAddr = CUPVA_GET_DESC_ATTR_ADDR(descId, TILE_CNTL);

    handler.vpuCTable.entries[entryCount].address = tileCtlAddr;
    handler.vpuCTable.entries[entryCount].value =
        CUPVA_DMA_FIELD(TILE_CNTL, TX, (uint32_t)tx) | CUPVA_DMA_FIELD(TILE_CNTL, TY, (uint32_t)ty);
    entryCount++;

    handler.entryCount = entryCount;
}

inline void cupvaSQDFOpen(SequenceDataFlowHandler &handler)
{
    handler.vpuCTable.magic      = SEQDF_VPUC_MAGIC;
    handler.entryCount           = 1;
    handler.vpuCTable.entryCount = 1;
    // First table entry is noop - just re-write our own SRC address
    uint32_t const descId                = handler.metadata.config_id;
    handler.vpuCTable.entries[0].address = CUPVA_GET_DESC_ATTR_ADDR(descId, SRC_ADR);
    handler.vpuCTable.entries[0].value   = cupvaGetVmemAddress(&handler.vpuCTable);
}

inline void _cupvaSQDFTrigCommon(SequenceDataFlowHandler const &handler)
{
    uint32_t const currentTrigIdx = handler.currentTrigIdx;
    uint32_t const triggerMask    = (currentTrigIdx == 0U) ? handler.metadata.cfg_trig : handler.metadata.data_trig;
    chess_memory_fence();
    cupvaDataFlowTrig(triggerMask);
}

inline void cupvaSQDFFlushAndTrig(SequenceDataFlowHandler &handler)
{
    if (handler.entryCount > 1U)
    {
        handler.vpuCTable.entryCount = handler.entryCount;
        handler.entryCount           = 1;
    }
    _cupvaSQDFTrigCommon(handler);
}

inline void cupvaSQDFTrig(SequenceDataFlowHandler &handler)
{
    handler.vpuCTable.entryCount = 1;
    _cupvaSQDFTrigCommon(handler);
}

inline void cupvaSQDFSync(SequenceDataFlowHandler &handler)
{
    uint32_t const currentTrigIdx = handler.currentTrigIdx;
    uint32_t const triggerMask    = (currentTrigIdx == 0U) ? handler.metadata.cfg_trig : handler.metadata.data_trig;
    handler.currentTrigIdx = (uint32_t)mod_inc((int32_t)currentTrigIdx, (int32_t)handler.metadata.trigger_count - 1);
    cupvaDataFlowSync(triggerMask);
}

inline void cupvaSQDFSync(SequenceDataFlowHandler **handlers, uint32_t num_handlers)
{

    uint32_t triggerMask = 0;
    for (uint32_t i = 0; i < num_handlers; i++)
    {
        uint32_t const currentTrigIdx = handlers[i]->currentTrigIdx;
        uint32_t const trigger =
            (currentTrigIdx == 0U) ? handlers[i]->metadata.cfg_trig : handlers[i]->metadata.data_trig;
        handlers[i]->currentTrigIdx =
            (uint32_t)mod_inc((int32_t)currentTrigIdx, (int32_t)handlers[i]->metadata.trigger_count - 1);
        triggerMask |= trigger;
    }

    cupvaDataFlowSync(triggerMask);
}

inline void cupvaSQDFClose(SequenceDataFlowHandler &handler)
{
    uint32_t entryCount   = 0U;
    uint32_t const descId = handler.metadata.transfers[0].desc_id;
    // Config the second descriptor to point at the VPU CFG table for src/dst
    handler.vpuCTable.entries[entryCount].address = CUPVA_GET_DESC_ATTR_ADDR(descId, SRC_ADR);
    handler.vpuCTable.entries[entryCount].value   = cupvaGetVmemAddress(&handler.vpuCTable);
    entryCount++;
    handler.vpuCTable.entries[entryCount].address = CUPVA_GET_DESC_ATTR_ADDR(descId, DST_ADR);
    handler.vpuCTable.entries[entryCount].value   = cupvaGetVmemAddress(&handler.vpuCTable);
    entryCount++;
    // Config the second descriptor to have null linkid, src & dst VMEM
    handler.vpuCTable.entries[entryCount].address = CUPVA_GET_DESC_ATTR_ADDR(descId, DESCR_CNTL);
    handler.vpuCTable.entries[entryCount].value   = CUPVA_DMA_FIELD(DESCR_CNTL, DSTM, (uint32_t)DMA_TRANS_MODE_VMEM) |
                                                  CUPVA_DMA_FIELD(DESCR_CNTL, DDTM, (uint32_t)DMA_TRANS_MODE_VMEM);
    entryCount++;
    // Config the second descriptor to have ITC = 1
    handler.vpuCTable.entries[entryCount].address = CUPVA_GET_DESC_ATTR_ADDR(descId, TRANS_CNTL);
    handler.vpuCTable.entries[entryCount].value   = CUPVA_DMA_FIELD(TRANS_CNTL, ITC, 1U);
    entryCount++;
    // Config the second descriptor to have tx=ty=0
    handler.vpuCTable.entries[entryCount].address = CUPVA_GET_DESC_ATTR_ADDR(descId, TILE_CNTL);
    handler.vpuCTable.entries[entryCount].value   = 0;
    entryCount++;
    // Config the second descriptor to have ECET=0
    handler.vpuCTable.entries[entryCount].address = CUPVA_GET_DESC_ATTR_ADDR(descId, EVENT_CNTL);
    handler.vpuCTable.entries[entryCount].value =
        CUPVA_DMA_FIELD(EVENT_CNTL, TRIG_VPU_HW_EVENTS, (uint32_t)handler.metadata.xfer0_trigger);
    entryCount++;
    handler.vpuCTable.entryCount = entryCount;
    cupvaDataFlowTrig(handler.metadata.cfg_trig);
    cupvaDataFlowSync(handler.metadata.cfg_trig);
}

#endif