cupva_host.hpp#
Fully qualified name: src/host/cpp_api/include/cupva_host.hpp
File members: src/host/cpp_api/include/cupva_host.hpp
/*
* SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: LicenseRef-NvidiaProprietary
*
* NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
* property and proprietary rights in and to this material, related
* documentation and any modifications thereto. Any use, reproduction,
* disclosure or distribution of this material and related documentation
* without an express license agreement from NVIDIA CORPORATION or
* its affiliates is strictly prohibited.
*/
#ifndef CUPVA_HOST_HPP
#define CUPVA_HOST_HPP
#include <cupva_host_common.hpp>
#include <cupva_types.h>
#include <array>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <initializer_list>
#include <memory>
#include <stdexcept>
#include <string>
#include <type_traits>
namespace cupva {
class Parameter;
namespace impl {
//BaseCmd forward declaration
class BaseCmd;
//Static/ConfigDataFlow's base class
class BaseDataFlow;
// forward declaration of class CmdBuffer
class CmdBuffer;
// forward declaration of class CmdBarrier
class CmdBarrier;
// forward declaration of class CmdProgram
class CmdProgram;
// forward declaration of class CmdRequestFences
class CmdRequestFences;
// forward declaration of class CmdWaitOnFences
class CmdWaitOnFences;
// forward declaration of class CmdL2Ops
class CmdL2Ops;
// forward declaration of class ConfigDataFlow
class ConfigDataFlow;
// forward declaration of class Context
class Context;
// forward declaration of class DynamicDataFlow
class DynamicDataFlow;
// forward declaration of class Executable
class Executable;
// forward declaration of class Fence
class Fence;
// Node is for internal DynamicDataFlow configuration
class Node;
// forward declaration of class Parameter
class Parameter;
template<typename>
// forward declaration of class OffsetPointer
class OffsetPointer;
// forward declaration of class RasterDataFlow
class RasterDataFlow;
// forward declaration of class StaticDataFlow
class StaticDataFlow;
// forward declaration of class GatherScatterDataFlow
class GatherScatterDataFlow;
// forward declaration of class SequenceDataFlow
class SequenceDataFlow;
// forward declaration of class SequenceDataFlowTransfer
class SequenceDataFlowTransfer;
// forward declaration of class TensorDataFlow
class TensorDataFlow;
// forward declaration of class Stream
class Stream;
// forward declaration of class SyncObj
class SyncObj;
} // namespace impl
namespace mem {
DLL_EXPORT void *Alloc(int64_t const size, AccessType const accessType = AccessType::READ_WRITE,
AllocType const allocType = AllocType::ALLOC_DRAM);
DLL_EXPORT void Free(void *const devicePtr);
DLL_EXPORT void *GetHostPointer(void *const devicePtr);
DLL_EXPORT void *GetL2BaseAddress();
DLL_EXPORT void GetPointerAttributes(const void *const devicePtr, PointerAttributes &attr);
DLL_EXPORT void GetSurfaceAttributes(const void *const devicePtr, SurfaceAttributes &attr);
DLL_EXPORT void ConvertToGeometry(cupva::mem::SurfaceAttributes const &attr, int32_t const bpp, PlanarGeometry &geom);
DLL_EXPORT void Register(const void *const ptr, int64_t const size,
cupva::mem::ExternalAllocType const externalAllocType);
DLL_EXPORT void Unregister(const void *const ptr);
DLL_EXPORT void *MapL2(void *devptr, uint32_t size,
mem::L2SRAMPolicyType policy = mem::L2SRAMPolicyType::FILL_AND_FLUSH);
} // namespace mem
namespace priv {
static constexpr size_t CUPVA_OFFSET_PTR_SIZE{32U};
static constexpr size_t CUPVA_FENCE_SIZE{48U};
static constexpr size_t CUPVA_FENCE_ALIGN{8U};
static constexpr size_t CUPVA_BASE_CMD_SIZE{24U};
static constexpr size_t CUPVA_BASE_CMD_ALIGN{8U};
static constexpr size_t CUPVA_PARAMETER_SIZE{16U};
} // namespace priv
template<typename T>
class DynamicStorage
{
static_assert(!std::is_pointer<T>::value, "DynamicStorage should not be used with pointer types");
// PtrType is a unique pointer of type T
using PtrType = std::unique_ptr<T>;
// DynamicStorage is not copyable
DynamicStorage(const DynamicStorage &) = delete;
// DynamicStorage is not copyable
auto operator=(const DynamicStorage &) & -> DynamicStorage & = delete;
public:
// ImplType is a typedef for the template type T
using ImplType = T;
DynamicStorage() noexcept
: DynamicStorage(nullptr)
{
}
explicit DynamicStorage(PtrType ptr) noexcept
: m_impl{std::move(ptr)}
{
}
DynamicStorage(DynamicStorage &&v) noexcept
: DynamicStorage{}
{
*this = std::move(v);
}
auto operator=(DynamicStorage &&v) & noexcept -> DynamicStorage &
{
using std::swap;
swap(m_impl, v.m_impl);
return *this;
}
~DynamicStorage() = default;
auto getImpl() noexcept -> T *
{
return m_impl.get();
}
auto getImpl() const noexcept -> T const *
{
return m_impl.get();
}
void setImpl(PtrType ptr) noexcept
{
m_impl = std::move(ptr);
}
auto release() noexcept -> T *
{
return m_impl.release();
}
private:
//unique pointer to type T
PtrType m_impl;
};
class DLL_EXPORT BaseCmd : public StaticStorage<impl::BaseCmd, priv::CUPVA_BASE_CMD_SIZE, priv::CUPVA_BASE_CMD_ALIGN>
{
// BaseCmd is not copyable
BaseCmd(const BaseCmd &) = delete;
// BaseCmd is not copyable
BaseCmd &operator=(const BaseCmd &) = delete;
public:
BaseCmd() noexcept;
BaseCmd(BaseCmd &&obj) noexcept
: BaseCmd{}
{
*this = std::move(obj);
}
~BaseCmd() noexcept;
void finalize();
BaseCmd &operator=(BaseCmd &&obj) & noexcept
{
StaticStorage::operator=(std::move(obj));
return *this;
}
};
using Cmd = const BaseCmd *;
class DLL_EXPORT BaseCmdList final
{
// ImplPtrType is typedef for unique pointer to impl::BaseCmd array
using ImplPtrType = std::unique_ptr<impl::BaseCmd[]>;
// PtrType is typedef for unique pointer to BaseCmd array
using PtrType = std::unique_ptr<BaseCmd[]>;
//SizeType is typedef for int32_t
using SizeType = int32_t;
// SFINAE helper - struct with variadic bool template arguments
template<bool...>
struct BoolPack;
// SFINAE helper - compare BoolPacks
template<bool... bools>
using AllTrue = std::is_same<BoolPack<bools..., true>, BoolPack<true, bools...>>;
// SFINAE helper - check all bases of variadic pack
template<class B, class... Ts>
using AllHaveBase = AllTrue<std::is_base_of<B, Ts>::value...>;
public:
BaseCmdList() noexcept
: BaseCmdList{nullptr, 0}
{
}
BaseCmdList(PtrType data, SizeType const size) noexcept;
// BaseCmdList is not copyable
BaseCmdList(BaseCmdList &) = delete;
// BaseCmdList is not copyable
BaseCmdList &operator=(BaseCmdList &) = delete;
BaseCmdList(BaseCmdList &&obj) noexcept
: BaseCmdList{}
{
*this = std::move(obj);
}
BaseCmdList &operator=(BaseCmdList &&obj) & noexcept
{
using std::swap;
swap(m_data, obj.m_data);
swap(m_size, obj.m_size);
return *this;
}
template<typename... Args, std::enable_if_t<AllHaveBase<BaseCmd, Args...>::value, int> = 0>
BaseCmdList(Args &&...argsParam)
: BaseCmdList{static_cast<SizeType>(sizeof...(Args))}
{
using ExpanderType = uint32_t[];
SizeType idx = 0;
(void)ExpanderType{0U, (m_data[idx++] = std::forward<Args>(argsParam), 0U)...};
}
~BaseCmdList() noexcept;
void finalize();
SizeType getSize() const noexcept
{
return m_size;
}
ImplPtrType release() noexcept;
private:
explicit BaseCmdList(SizeType const size);
//Private pointer to the data owned by this object
PtrType m_data;
// Size of the cmd buffer
SizeType m_size;
};
class DLL_EXPORT CmdBuffer final : public DynamicStorage<impl::CmdBuffer>
{
public:
static CmdBuffer Create(BaseCmdList cmdList, CmdStatus *const status = nullptr, OrderType const order = IN_ORDER);
CmdBuffer() noexcept;
// CmdBuffer is not copyable
CmdBuffer(CmdBuffer const &) = delete;
// CmdBuffer is not copyable
CmdBuffer &operator=(CmdBuffer const &) & = delete;
CmdBuffer(CmdBuffer &&obj) noexcept;
CmdBuffer &operator=(CmdBuffer &&obj) & noexcept;
void finalize();
~CmdBuffer() noexcept;
};
namespace priv {
template<typename T>
class DLL_EXPORT ByteOffsetPointer : public StaticStorage<impl::OffsetPointer<T>, cupva::priv::CUPVA_OFFSET_PTR_SIZE>
{
static_assert(std::is_same<std::remove_const_t<T>, uint8_t>::value,
"Template argument to ByteOffsetPointer must be uint8_t or const uint8_t");
// Helper base storage identifier
using BaseStorage = StaticStorage<impl::OffsetPointer<T>, cupva::priv::CUPVA_OFFSET_PTR_SIZE>;
// Type of void* (either void* or void const *)
using PtrType = std::conditional_t<std::is_const<T>::value, void const *, void *>;
protected:
ByteOffsetPointer() noexcept;
explicit ByteOffsetPointer(MemType const type, PtrType const basePtr = nullptr);
ByteOffsetPointer(const ByteOffsetPointer &) = delete;
auto operator=(const ByteOffsetPointer &op) & noexcept -> ByteOffsetPointer &;
ByteOffsetPointer(ByteOffsetPointer &&) = delete;
auto operator=(ByteOffsetPointer &&op) & noexcept -> ByteOffsetPointer &;
~ByteOffsetPointer() noexcept;
template<typename U = T, std::enable_if_t<std::is_const<U>::value, int> = 0>
auto operator=(ByteOffsetPointer<uint8_t> const &op) & noexcept -> ByteOffsetPointer &;
template<typename U = T, std::enable_if_t<std::is_const<U>::value, int> = 0>
auto operator=(ByteOffsetPointer<uint8_t> &&op) & noexcept -> ByteOffsetPointer &;
void setBasePtr(PtrType const ptr);
MemType getPtrMemType() const noexcept;
auto operator+=(ptrdiff_t const rhs) & noexcept -> ByteOffsetPointer &;
auto operator-=(ptrdiff_t const rhs) & noexcept -> ByteOffsetPointer &;
template<typename U>
ptrdiff_t operator-(const ByteOffsetPointer<U> &rhs) const &;
};
} // namespace priv
template<typename T>
class OffsetPointer final
: public std::conditional_t<std::is_const<T>::value, priv::ByteOffsetPointer<uint8_t const>,
priv::ByteOffsetPointer<uint8_t>>
{
static_assert(!std::is_pointer<T>::value, "OffsetPointer should not be used with pointer types");
//BaseType is typedef to ByteOffsetPointerConst if value is const else typedef to ByteOffsetPointer
using BaseType = std::conditional_t<std::is_const<T>::value, priv::ByteOffsetPointer<uint8_t const>,
priv::ByteOffsetPointer<uint8_t>>;
// make sure const and non-const variants have the same size
static_assert(sizeof(priv::ByteOffsetPointer<uint8_t const>) == sizeof(priv::ByteOffsetPointer<uint8_t>),
"container size mismatch!");
// Helper to determine whether we should allow copy/move from a different type. It is allowed in following cases:
// 1) T is const, U has same underlying type (U may differ in constness)
// 2) T is const void
// 3) T is void, U is non-const
template<typename U>
using CanInitFrom = std::enable_if_t<(std::is_const<T>::value &&
std::is_same<std::remove_cv_t<T>, std::remove_cv_t<U>>::value) || // Case 1
(std::is_same<T, const void>::value) || // Case 2
(std::is_same<T, void>::value && !std::is_const<U>::value), // Case 3
int>;
// Helper to disable arithmetic operations when base type is void
template<typename U>
using CanDoArithmetic = std::enable_if_t<!std::is_same<std::remove_const_t<U>, void>::value, int>;
public:
using PointedToType = T;
OffsetPointer() noexcept
: BaseType{}
{
}
OffsetPointer(const OffsetPointer &op) noexcept
: BaseType{}
{
*this = op;
}
OffsetPointer &operator=(const OffsetPointer &op) & noexcept
{
BaseType::operator=(op);
return *this;
}
OffsetPointer(OffsetPointer &&op) noexcept
: BaseType{}
{
*this = std::move(op);
}
OffsetPointer &operator=(OffsetPointer &&op) & noexcept
{
BaseType::operator=(std::move(op));
return *this;
}
~OffsetPointer() noexcept = default;
template<typename U, CanInitFrom<U> = 0>
OffsetPointer(const OffsetPointer<U> &op) noexcept
: BaseType{}
{
*this = op;
}
template<typename U, CanInitFrom<U> = 0>
auto operator=(const OffsetPointer<U> &op) & noexcept -> OffsetPointer &
{
BaseType::operator=(op);
return *this;
}
template<typename U, CanInitFrom<U> = 0>
OffsetPointer(OffsetPointer<U> &&op) noexcept
: BaseType{}
{
*this = std::move(op);
}
template<typename U, CanInitFrom<U> = 0>
auto operator=(OffsetPointer<U> &&op) & noexcept -> OffsetPointer &
{
BaseType::operator=(std::move(op));
return *this;
}
static auto Create(cupva::MemType const type, T *const basePtr = nullptr) -> OffsetPointer
{
OffsetPointer rval{type, basePtr};
return rval;
}
void setBase(T *ptr)
{
BaseType::setBasePtr(ptr);
}
cupva::MemType getMemType() const noexcept
{
return BaseType::getPtrMemType();
}
template<typename U = T, CanDoArithmetic<U> = 0>
auto operator+=(std::ptrdiff_t const rhs) & noexcept -> OffsetPointer &
{
BaseType::operator+=(rhs * sizeof(T));
return *this;
}
template<typename U = T, CanDoArithmetic<U> = 0>
auto operator-=(std::ptrdiff_t const rhs) & noexcept -> OffsetPointer &
{
BaseType::operator-=(rhs * sizeof(T));
return *this;
}
template<typename U = T, CanDoArithmetic<U> = 0>
auto operator+(std::ptrdiff_t const rhs) const & noexcept -> OffsetPointer
{
OffsetPointer rval{*this};
rval += rhs;
return rval;
}
template<typename U = T, CanDoArithmetic<U> = 0>
auto operator-(std::ptrdiff_t const rhs) const & noexcept -> OffsetPointer
{
OffsetPointer rval{*this};
rval -= rhs;
return rval;
}
template<typename U, std::enable_if_t<std::is_same<std::remove_cv_t<T>, std::remove_cv_t<U>>::value, int> = 0>
std::ptrdiff_t operator-(const cupva::OffsetPointer<U> &rhs) const &
{
return BaseType::operator-(rhs) / static_cast<ptrdiff_t>(sizeof(T));
}
private:
explicit OffsetPointer(cupva::MemType const type, T *const basePtr)
: BaseType{type, basePtr}
{
}
};
class BaseDataFlow : public DynamicStorage<impl::BaseDataFlow>
{
};
class DLL_EXPORT StaticDataFlow final : public BaseDataFlow
{
public:
static constexpr bool isPrimitive{true};
static constexpr GranType TILE{GranType::TILE};
static constexpr GranType DIM1{GranType::DIM1};
static constexpr GranType DIM2{GranType::DIM2};
static constexpr GranType ALL{GranType::ALL};
static StaticDataFlow Create();
StaticDataFlow() noexcept;
~StaticDataFlow() noexcept;
StaticDataFlow(StaticDataFlow &&) noexcept;
StaticDataFlow &operator=(StaticDataFlow &&) & noexcept;
// StaticDataFlow is not copyable
StaticDataFlow(StaticDataFlow const &) = delete;
// StaticDataFlow is not copyable
StaticDataFlow &operator=(StaticDataFlow const &) & = delete;
template<typename T, typename std::enable_if<T::isPrimitive, bool>::type = true>
StaticDataFlow &link(T &next)
{
return linkInternal(next, false);
}
template<typename T, typename std::enable_if<T::isPrimitive, bool>::type = true>
StaticDataFlow &append(T &next)
{
return linkInternal(next, true);
}
StaticDataFlow &handler(const Parameter &handler);
StaticDataFlow &bpp(int32_t const bpp);
StaticDataFlow &tile(int32_t const width, int32_t const height = 1);
StaticDataFlow &padDim(PadDirType const dir, int32_t const dim);
StaticDataFlow &padVal(PadModeType const type, int32_t const val = 0);
StaticDataFlow &granularity(GranType const type);
StaticDataFlow &src(const void *const ptr);
StaticDataFlow &src(OffsetPointer<void const> const op);
int32_t id() const;
StaticDataFlow &srcLinePitch(int32_t const linePitch);
StaticDataFlow &srcDim1(int32_t const niter, int32_t const adv);
StaticDataFlow &srcDim2(int32_t const niter, int32_t const adv);
StaticDataFlow &srcDim3(int32_t const niter, int32_t const adv);
StaticDataFlow &srcCircularBuffer(int32_t const cbStart, int32_t const cbLen);
StaticDataFlow &dst(void *const ptr, void *const ptr2 = nullptr);
StaticDataFlow &dst(OffsetPointer<void> const op, OffsetPointer<void> const op2 = {});
StaticDataFlow &dstLinePitch(int32_t const linePitch);
StaticDataFlow &dstDim1(int32_t const niter, int32_t const adv);
StaticDataFlow &dstDim2(int32_t const niter, int32_t const adv);
StaticDataFlow &dstDim3(int32_t const niter, int32_t const adv);
StaticDataFlow &dstCircularBuffer(int32_t const cbStart, int32_t const cbLen);
void copy(const StaticDataFlow &source);
private:
StaticDataFlow &linkInternal(cupva::BaseDataFlow &nextP, const bool isAppend);
};
class DLL_EXPORT ConfigDataFlow final : public BaseDataFlow
{
public:
static constexpr bool isPrimitive{true};
static ConfigDataFlow Create();
ConfigDataFlow() noexcept;
~ConfigDataFlow() noexcept;
// ConfigDataFlow is not copyable
ConfigDataFlow(ConfigDataFlow const &) = delete;
// ConfigDataFlow is not copyable
ConfigDataFlow &operator=(ConfigDataFlow const &) & = delete;
ConfigDataFlow(ConfigDataFlow &&obj) noexcept;
ConfigDataFlow &operator=(ConfigDataFlow &&obj) & noexcept;
template<typename T, typename std::enable_if<T::isPrimitive, bool>::type = true>
ConfigDataFlow &link(T &next)
{
return linkInternal(next, false);
}
template<typename T, typename std::enable_if<T::isPrimitive, bool>::type = true>
ConfigDataFlow &append(T &next)
{
return linkInternal(next, true);
}
ConfigDataFlow &src(const void *const ptr);
int32_t id() const;
ConfigDataFlow &handler(const Parameter &handler);
void copy(const ConfigDataFlow &rhs);
private:
ConfigDataFlow &linkInternal(cupva::BaseDataFlow &nextP, const bool isAppend);
};
namespace priv {
template<typename T>
struct GetPointedToType;
template<typename T>
struct GetPointedToType<OffsetPointer<T>>
{
// Type will resolve to datatype of pointed to object.
using Type = T;
};
template<typename T>
struct GetPointedToType<T *>
{
// Type will resolve to datatype of pointed to object.
using Type = T;
};
} // namespace priv
class DLL_EXPORT RasterDataFlow final : public BaseDataFlow
{
public:
static RasterDataFlow Create();
RasterDataFlow() noexcept;
~RasterDataFlow() noexcept;
RasterDataFlow(RasterDataFlow &&) noexcept;
RasterDataFlow &operator=(RasterDataFlow &&) & noexcept;
// RasterDataFlow is not copyable
RasterDataFlow(RasterDataFlow const &) = delete;
// RasterDataFlow is not copyable
RasterDataFlow &operator=(RasterDataFlow const &) & = delete;
RasterDataFlow &link(RasterDataFlow &next);
RasterDataFlow &handler(const Parameter &handler);
template<typename T>
RasterDataFlow &src(T &&op, int32_t const width, int32_t const height, int32_t const linePitch = 0)
{
using PointedToType = typename priv::GetPointedToType<std::decay_t<T>>::Type;
static_assert(!std::is_same<std::remove_cv_t<PointedToType>, void>::value,
"RasterDataFlow::src requires non-void pointed-to type");
srcImpl(std::forward<T>(op), sizeof(PointedToType), width, height, linePitch);
return *this;
}
template<typename T>
RasterDataFlow &dst(T &&op, int32_t const width, int32_t const height, int32_t const linePitch = 0)
{
using PointedToType = typename priv::GetPointedToType<std::decay_t<T>>::Type;
static_assert(!std::is_const<PointedToType>::value, "RasterDataFlow::dst requires non-const pointed-to type");
static_assert(!std::is_same<std::remove_cv_t<PointedToType>, void>::value,
"RasterDataFlow::dst requires non-void pointed-to type");
dstImpl(std::forward<T>(op), sizeof(PointedToType), width, height, linePitch);
return *this;
}
RasterDataFlow &halo(int32_t const num, PadModeType const mode = PadModeType::PAD_CONST, int32_t const val = 0);
RasterDataFlow &halo(int32_t const numX, int32_t const numY, PadModeType const mode = PadModeType::PAD_CONST,
int32_t const val = 0);
RasterDataFlow &roi(int32_t const x, int32_t const y, int32_t const width, int32_t const height);
RasterDataFlow &tile(int32_t const width, int32_t const height);
RasterDataFlow &scanOrder(uint32_t const scanOrder = 0U);
RasterDataFlow &tileArena(int32_t const width, int32_t const height);
RasterDataFlow &transpose(TranspositionMode const mode = TRANS_MODE_1);
template<typename T>
RasterDataFlow &tileBuffer(T *const ptr, T *const ptr2 = nullptr)
{
tileBufferImpl(ptr, ptr2);
return *this;
}
private:
void srcImpl(const void *const ptr, int32_t const bpp, int32_t const width, int32_t const height,
int32_t const linePitch);
void srcImpl(OffsetPointer<void const> const op, int32_t const bpp, int32_t const width, int32_t const height,
int32_t const linePitch);
void dstImpl(void *const ptr, int32_t const bpp, int32_t const width, int32_t const height,
int32_t const linePitch);
void dstImpl(OffsetPointer<void> const op, int32_t const bpp, int32_t const width, int32_t const height,
int32_t const linePitch);
void tileBufferImpl(void *const ptr, void *const ptr2);
};
class DLL_EXPORT DynamicDataFlow final : public BaseDataFlow
{
public:
class Node final : public DynamicStorage<impl::Node>
{
public:
Node() noexcept;
~Node() noexcept;
// Node is not movable
Node(Node &&obj) noexcept = delete;
// Node is not movable
Node &operator=(Node &&obj) & noexcept = delete;
// Node is not copyable
Node(Node const &) = delete;
// Node is not copyable
Node &operator=(Node const &) & = delete;
Node &bpp(int32_t const bpp);
Node &padDim(PadDirType const dir, int32_t const dim);
Node &padVal(PadModeType const mode, int32_t const val = 0) noexcept;
Node &tile(int32_t const width, int32_t const height = 1);
Node &src(const void *const ptr, int32_t const linePitch);
Node &src(OffsetPointer<void const> const op, int32_t const linePitch);
Node &dst(void *const ptr, int32_t const linePitch);
Node &dst(OffsetPointer<void> const op, int32_t const linePitch);
};
DynamicDataFlow() noexcept;
static DynamicDataFlow Create();
~DynamicDataFlow() noexcept;
DynamicDataFlow(DynamicDataFlow &&) noexcept;
DynamicDataFlow &operator=(DynamicDataFlow &&) & noexcept;
// DynamicDataFlow is not copyable
DynamicDataFlow(DynamicDataFlow const &) = delete;
// DynamicDataFlow is not copyable
DynamicDataFlow &operator=(DynamicDataFlow const &) & = delete;
void init(const Parameter &pmt, int32_t const nodeCount = 1, int32_t const laneCount = 1);
Node &at(int32_t const nodeIdx = 0, int32_t const laneIdx = 0);
};
class DLL_EXPORT GatherScatterDataFlow final : public BaseDataFlow
{
public:
GatherScatterDataFlow() noexcept;
static GatherScatterDataFlow Create();
~GatherScatterDataFlow() noexcept;
GatherScatterDataFlow(GatherScatterDataFlow &&) noexcept;
GatherScatterDataFlow &operator=(GatherScatterDataFlow &&) & noexcept;
// GatherScatterDataFlow is not copyable
GatherScatterDataFlow(GatherScatterDataFlow const &) = delete;
// GatherScatterDataFlow is not copyable
GatherScatterDataFlow &operator=(GatherScatterDataFlow const &) & = delete;
GatherScatterDataFlow &handler(Parameter const &handler);
GatherScatterDataFlow &numTilesPerTrigger(int32_t const numTiles);
GatherScatterDataFlow &bufferType(mem::BufferType const type, bool const allowNonUniformLinePitch = false);
template<typename T>
GatherScatterDataFlow &src(T &&op, int32_t const width = 0, int32_t const height = 1, int32_t const linePitch = 0)
{
using PointedToType = typename priv::GetPointedToType<std::decay_t<T>>::Type;
static_assert(!std::is_same<std::remove_cv_t<PointedToType>, void>::value,
"GatherScatterDataFlow::src requires non-void pointed-to type");
static_assert((sizeof(PointedToType) == sizeof(uint8_t)) || (sizeof(PointedToType) == sizeof(uint16_t)) ||
(sizeof(PointedToType) == sizeof(uint32_t)),
"GatherScatterDataFlow only supports 1, 2 or 4 byte pixels");
srcImpl(std::forward<T>(op), sizeof(PointedToType), width, height, linePitch);
return *this;
}
template<typename T>
GatherScatterDataFlow &src(T &&op, PlanarGeometry const &geom)
{
using PointedToType = typename priv::GetPointedToType<std::decay_t<T>>::Type;
static_assert(!std::is_same<std::remove_cv_t<PointedToType>, void>::value,
"GatherScatterDataFlow::src requires non-void pointed-to type");
static_assert((sizeof(PointedToType) == sizeof(uint8_t)) || (sizeof(PointedToType) == sizeof(uint16_t)) ||
(sizeof(PointedToType) == sizeof(uint32_t)),
"GatherScatterDataFlow only supports 1, 2 or 4 byte pixels");
srcImpl(std::forward<T>(op), sizeof(PointedToType), geom);
return *this;
}
template<typename T>
GatherScatterDataFlow &dst(T &&op, int32_t const width = 0, int32_t const height = 1, int32_t const linePitch = 0)
{
using PointedToType = typename priv::GetPointedToType<std::decay_t<T>>::Type;
static_assert(!std::is_same<std::remove_cv_t<PointedToType>, void>::value,
"GatherScatterDataFlow::dst requires non-void pointed-to type");
static_assert((sizeof(PointedToType) == sizeof(uint8_t)) || (sizeof(PointedToType) == sizeof(uint16_t)) ||
(sizeof(PointedToType) == sizeof(uint32_t)),
"GatherScatterDataFlow only supports 1, 2 or 4 byte pixels");
dstImpl(std::forward<T>(op), sizeof(PointedToType), width, height, linePitch);
return *this;
}
template<typename T>
GatherScatterDataFlow &dst(T &&op, const PlanarGeometry &geom)
{
using PointedToType = typename priv::GetPointedToType<std::decay_t<T>>::Type;
static_assert(!std::is_same<std::remove_cv_t<PointedToType>, void>::value,
"GatherScatterDataFlow::dst requires non-void pointed-to type");
static_assert((sizeof(PointedToType) == sizeof(uint8_t)) || (sizeof(PointedToType) == sizeof(uint16_t)) ||
(sizeof(PointedToType) == sizeof(uint32_t)),
"GatherScatterDataFlow only supports 1, 2 or 4 byte pixels");
dstImpl(std::forward<T>(op), sizeof(PointedToType), geom);
return *this;
}
GatherScatterDataFlow &tile(int32_t const width, int32_t const height = 1);
GatherScatterDataFlow &padVal(PadModeType const mode, int32_t const val = 0);
private:
void srcImpl(const void *const ptr, int32_t const bpp, int32_t const width, int32_t const height,
int32_t const linePitch);
void srcImpl(OffsetPointer<void const> const op, int32_t const bpp, int32_t const width, int32_t const height,
int32_t const linePitch);
void srcImpl(const void *const ptr, int32_t const bpp, PlanarGeometry const &geom);
void srcImpl(OffsetPointer<void const> const op, int32_t const bpp, PlanarGeometry const &geom);
void dstImpl(void *const ptr, int32_t const bpp, int32_t const width, int32_t const height,
int32_t const linePitch);
void dstImpl(OffsetPointer<void> const op, int32_t const bpp, int32_t const width, int32_t const height,
int32_t const linePitch);
void dstImpl(void *const ptr, int32_t const bpp, PlanarGeometry const &geom);
void dstImpl(OffsetPointer<void> const op, int32_t const bpp, PlanarGeometry const &geom);
};
class DLL_EXPORT SequenceDataFlowTransfer final
{
public:
// ImplType is the internal representation for SequenceDataFlowTransfer
using ImplType = cupva::impl::SequenceDataFlowTransfer;
// Constructor to wrap implementation object
SequenceDataFlowTransfer(ImplType *const tf) noexcept;
// SequenceDataFlowTransfer does nothing on destruction
~SequenceDataFlowTransfer() noexcept = default;
// SequenceDataFlowTransfer is default move contructable
SequenceDataFlowTransfer(SequenceDataFlowTransfer &&) = default;
// SequenceDataFlowTransfer is default move assignable
SequenceDataFlowTransfer &operator=(SequenceDataFlowTransfer &&) & = default;
// SequenceDataFlowTransfer is default copy constructable
SequenceDataFlowTransfer(SequenceDataFlowTransfer const &) = default;
// SequenceDataFlowTransfer is default copy assignable
SequenceDataFlowTransfer &operator=(SequenceDataFlowTransfer const &) = default;
SequenceDataFlowTransfer &tile(int32_t const width, int32_t const height = 1);
SequenceDataFlowTransfer &src(const void *const ptr, int32_t const linePitch);
SequenceDataFlowTransfer &src(OffsetPointer<void const> const op, int32_t const linePitch);
SequenceDataFlowTransfer &srcDim1(int32_t const niter, int32_t const adv);
SequenceDataFlowTransfer &srcDim2(int32_t const niter, int32_t const adv);
SequenceDataFlowTransfer &srcDim3(int32_t const niter, int32_t const adv);
SequenceDataFlowTransfer &dst(void *const ptr, int32_t const linePitch);
SequenceDataFlowTransfer &dst(OffsetPointer<void> const op, int32_t const linePitch);
SequenceDataFlowTransfer &dstDim1(int32_t const niter, int32_t const adv);
SequenceDataFlowTransfer &dstDim2(int32_t const niter, int32_t const adv);
SequenceDataFlowTransfer &dstDim3(int32_t const niter, int32_t const adv);
SequenceDataFlowTransfer &mode(TransferModeType const mode);
SequenceDataFlowTransfer ©(SequenceDataFlowTransfer const other);
private:
// Pointer to internal implementation
ImplType *m_impl;
};
class DLL_EXPORT SequenceDataFlow final : public BaseDataFlow
{
public:
static constexpr int32_t MAXIMUM_TRANSFERS{8};
SequenceDataFlow() noexcept;
static SequenceDataFlow Create();
~SequenceDataFlow() noexcept;
SequenceDataFlow(SequenceDataFlow &&) noexcept;
SequenceDataFlow &operator=(SequenceDataFlow &&) & noexcept;
// SequenceDataFlow is not copyable
SequenceDataFlow(SequenceDataFlow const &) = delete;
// SequenceDataFlow is not copyable
SequenceDataFlow &operator=(SequenceDataFlow const &) & = delete;
SequenceDataFlow &handler(const Parameter &handler);
SequenceDataFlowTransfer addTransfer();
};
class DLL_EXPORT TensorDataFlow final : public BaseDataFlow
{
public:
static TensorDataFlow Create();
TensorDataFlow() noexcept;
~TensorDataFlow() noexcept;
TensorDataFlow(TensorDataFlow &&) noexcept;
TensorDataFlow &operator=(TensorDataFlow &&) & noexcept;
// TensorDataFlow is not copyable
TensorDataFlow(TensorDataFlow const &) = delete;
// TensorDataFlow is not copyable
TensorDataFlow &operator=(TensorDataFlow const &) & = delete;
TensorDataFlow &handler(const Parameter &handler);
template<typename T>
TensorDataFlow &src(T &&op, int32_t const width, int32_t const height, int32_t const depth,
int32_t const rowStride = 0, int32_t const depthStride = 0)
{
using PointedToType = typename priv::GetPointedToType<std::decay_t<T>>::Type;
static_assert(!std::is_same<std::remove_cv_t<PointedToType>, void>::value,
"TensorDataFlow::src requires non-void pointed-to type");
srcImpl(std::forward<T>(op), sizeof(PointedToType), width, height, depth, rowStride, depthStride);
return *this;
}
template<typename T>
TensorDataFlow &dst(T &&op, int32_t const width, int32_t const height, int32_t const depth,
int32_t const rowStride = 0, int32_t const depthStride = 0)
{
using PointedToType = typename priv::GetPointedToType<std::decay_t<T>>::Type;
static_assert(!std::is_const<PointedToType>::value, "TensorDataFlow::dst requires non-const pointed-to type");
static_assert(!std::is_same<std::remove_cv_t<PointedToType>, void>::value,
"TensorDataFlow::dst requires non-void pointed-to type");
dstImpl(std::forward<T>(op), sizeof(PointedToType), width, height, depth, rowStride, depthStride);
return *this;
}
TensorDataFlow &halo(int32_t const num, PadModeType const mode = PadModeType::PAD_CONST, int32_t const val = 0);
TensorDataFlow &halo(int32_t const numX, int32_t const numY, PadModeType const mode = PadModeType::PAD_CONST,
int32_t const val = 0);
TensorDataFlow &roi(int32_t const x, int32_t const y, int32_t const width, int32_t const height);
TensorDataFlow &tile(int32_t const width, int32_t const height, int32_t const depth = 0);
template<typename T>
TensorDataFlow &tileBuffer(T *const ptr)
{
tileBufferImpl(ptr);
return *this;
}
private:
void srcImpl(const void *const ptr, int32_t const bpp, int32_t const width, int32_t const height,
int32_t const depth, int32_t const rowStride, int32_t const depthStride);
void srcImpl(OffsetPointer<void const> const op, int32_t const bpp, int32_t const width, int32_t const height,
int32_t const depth, int32_t const rowStride, int32_t const depthStride);
void dstImpl(void *const ptr, int32_t const bpp, int32_t const width, int32_t const height, int32_t const depth,
int32_t const rowStride, int32_t const depthStride);
void dstImpl(OffsetPointer<void> const op, int32_t const bpp, int32_t const width, int32_t const height,
int32_t const depth, int32_t const rowStride, int32_t const depthStride);
void tileBufferImpl(void *const ptr);
};
class DLL_EXPORT SyncObj final : public DynamicStorage<impl::SyncObj>
{
public:
static SyncObj Create(bool const isTimeStamp = false,
SyncClientType const clientType = SyncClientType::SIGNALER_WAITER,
SyncWaitMode const waitMode = SyncWaitMode::YIELD);
SyncObj() noexcept;
// SyncObj is not copyable
SyncObj(SyncObj const &) = delete;
// SyncObj is not copyable
SyncObj &operator=(SyncObj const &) & = delete;
SyncObj(SyncObj &&obj) noexcept;
SyncObj &operator=(SyncObj &&obj) & noexcept;
void finalize();
~SyncObj() noexcept;
};
class DLL_EXPORT Fence final : public StaticStorage<impl::Fence, priv::CUPVA_FENCE_SIZE, priv::CUPVA_FENCE_ALIGN>
{
public:
Fence() noexcept;
explicit Fence(SyncObj &syncObj) noexcept;
~Fence() noexcept;
Fence(Fence &&obj) noexcept = default;
Fence &operator=(Fence &&obj) & noexcept = default;
Fence(const Fence &obj) noexcept = default;
Fence &operator=(const Fence &obj) & noexcept = default;
bool wait(int64_t const timeout = -1) const;
uint64_t timestamp() const;
};
class DLL_EXPORT Executable final : public DynamicStorage<impl::Executable>
{
public:
static Executable Create(const void *data, int32_t size);
Executable() noexcept;
Executable(Executable &&obj) noexcept;
Executable &operator=(Executable &&obj) & noexcept;
// Executable is not copyable
Executable(Executable const &) = delete;
// Executable is not copyable
Executable &operator=(Executable const &) & = delete;
void finalize();
~Executable() noexcept;
};
class DLL_EXPORT CmdL2Ops final : public BaseCmd
{
public:
static CmdL2Ops Create(cupva::mem::L2OperationType const opType, void *const devptr);
CmdL2Ops() noexcept;
CmdL2Ops(CmdL2Ops &&obj) noexcept;
CmdL2Ops &operator=(CmdL2Ops &&obj) & noexcept;
CmdL2Ops(CmdL2Ops const &) = delete;
CmdL2Ops &operator=(CmdL2Ops const &) & = delete;
~CmdL2Ops() noexcept;
};
class DLL_EXPORT CmdWaitOnFences final : public BaseCmd
{
public:
CmdWaitOnFences() noexcept;
explicit CmdWaitOnFences(const Fence &fence) noexcept;
CmdWaitOnFences(const Fence *const fences, int32_t const count);
~CmdWaitOnFences() noexcept;
// CmdWaitOnFences is not copyable
CmdWaitOnFences(CmdWaitOnFences const &) = delete;
// CmdWaitOnFences is not copyable
CmdWaitOnFences &operator=(CmdWaitOnFences const &) & = delete;
CmdWaitOnFences(CmdWaitOnFences &&obj) noexcept;
CmdWaitOnFences &operator=(CmdWaitOnFences &&obj) & noexcept;
};
class DLL_EXPORT CmdRequestFences final : public BaseCmd
{
public:
CmdRequestFences() noexcept;
explicit CmdRequestFences(Fence &fence) noexcept;
CmdRequestFences(Fence *const fences, int32_t const count);
~CmdRequestFences() noexcept;
CmdRequestFences(CmdRequestFences &&obj) noexcept;
CmdRequestFences &operator=(CmdRequestFences &&obj) & noexcept;
// CmdRequestFences is not copyable
CmdRequestFences(CmdRequestFences const &) = delete;
// CmdRequestFences is not copyable
CmdRequestFences &operator=(CmdRequestFences const &) & = delete;
};
class DLL_EXPORT CmdBarrier final : public BaseCmd
{
public:
CmdBarrier() noexcept;
~CmdBarrier() noexcept;
CmdBarrier(CmdBarrier &&obj) noexcept;
CmdBarrier &operator=(CmdBarrier &&obj) & noexcept;
// CmdBarrier is not copyable
CmdBarrier(CmdBarrier const &) = delete;
// CmdBarrier is not copyable
CmdBarrier &operator=(CmdBarrier const &) & = delete;
};
class DLL_EXPORT Parameter final : public StaticStorage<impl::Parameter, priv::CUPVA_PARAMETER_SIZE>
{
public:
Parameter() noexcept;
~Parameter() noexcept;
Parameter(Parameter const &) = default;
Parameter(Parameter &&) = default;
Parameter &operator=(Parameter const &) = default;
Parameter &operator=(Parameter &&) = default;
template<typename T = void,
typename std::enable_if<std::is_class<T>::value || std::is_union<T>::value || std::is_arithmetic<T>::value,
int>::type = 0>
auto ptr() const -> T *
{
return static_cast<T *>(getDevicePointer());
}
template<typename T,
typename std::enable_if<!std::is_pointer<T>::value && std::is_arithmetic<T>::value, int>::type = 0>
void set(const T &val)
{
static_assert((sizeof(T) == sizeof(uint8_t)) || (sizeof(T) == sizeof(uint16_t)) ||
(sizeof(T) == sizeof(uint32_t)) || (sizeof(T) == sizeof(uint64_t)),
"scalar Parameter types must have standard storage size");
setValueArray(&val, sizeof(T));
}
template<typename T,
typename std::enable_if<!std::is_same<T, void>::value && !std::is_pointer<T>::value, int>::type = 0>
void set(const T *const dataPtr, int64_t const numElements)
{
setValueArray(static_cast<const void *>(dataPtr), sizeof(T) * numElements);
}
template<typename T, typename std::enable_if<std::is_pointer<T>::value, int>::type = 0>
void set(T const ptr)
{
setValuePointer(ptr);
}
template<typename T>
Parameter &operator=(const T &val)
{
set(val);
return *this;
}
private:
void setValueArray(const void *const dataPtr, int64_t const dataSizeInBytes);
void setValuePointer(void const *const ptr);
void *getDevicePointer() const;
};
class DLL_EXPORT CmdProgram final : public BaseCmd
{
public:
static CmdProgram Create();
static CmdProgram Create(const Executable &exec);
CmdProgram() noexcept;
CmdProgram(CmdProgram &&obj) noexcept;
CmdProgram &operator=(CmdProgram &&obj) & noexcept;
// CmdProgram is not copyable
CmdProgram(CmdProgram const &) = delete;
// CmdProgram is not copyable
CmdProgram &operator=(CmdProgram const &) & = delete;
~CmdProgram() noexcept;
void setL2Size(int32_t const size);
Parameter operator[](char_t const *const name);
template<typename T>
auto addDataFlow() -> T &
{
return static_cast<T &>(registerDataFlow(T::Create()));
}
template<typename T, typename std::enable_if<!std::is_pointer<T>::value, bool>::type = true>
auto addDataFlowHead(int32_t const phase = 0, float const allocWeight = 1.0F) -> T &
{
return static_cast<T &>(registerDataFlowHead(T::Create(), phase, allocWeight));
}
template<typename T, typename std::enable_if<!std::is_pointer<T>::value, bool>::type = true>
auto addDataFlowHead(BaseDataFlow const &sharedDataFlow) -> T &
{
return static_cast<T &>(registerDataFlowHead(T::Create(), sharedDataFlow));
}
void compileDataFlows();
void updateDataFlows();
private:
BaseDataFlow ®isterDataFlow(BaseDataFlow &&df);
BaseDataFlow ®isterDataFlowHead(BaseDataFlow &&df, int32_t const phase, float const allocWeight);
BaseDataFlow ®isterDataFlowHead(BaseDataFlow &&df, BaseDataFlow const &sharedDf);
};
class DLL_EXPORT CmdMemcpy final : public BaseCmd
{
public:
static CmdMemcpy Create(const OffsetPointer<void> &dst, const OffsetPointer<void const> &src, size_t const count);
static CmdMemcpy Create(void *const dst, const void *const src, size_t const count);
static CmdMemcpy Create(const OffsetPointer<void> &dst, const void *const src, size_t const count);
static CmdMemcpy Create(void *const dst, const OffsetPointer<void const> &src, size_t const count);
static CmdMemcpy Create2D(const OffsetPointer<void> &dst, const OffsetPointer<void const> &src,
size_t const dstLinePitch, size_t const srcLinePitch, size_t const width,
size_t const height);
static CmdMemcpy Create2D(void *const dst, const void *const src, size_t const dstLinePitch,
size_t const srcLinePitch, size_t const width, size_t const height);
static CmdMemcpy Create2D(const OffsetPointer<void> &dst, const void *const src, size_t const dstLinePitch,
size_t const srcLinePitch, size_t const width, size_t const height);
static CmdMemcpy Create2D(void *const dst, const OffsetPointer<void const> &src, size_t const dstLinePitch,
size_t const srcLinePitch, size_t const width, size_t const height);
CmdMemcpy() noexcept;
CmdMemcpy(CmdMemcpy &&obj) noexcept;
CmdMemcpy &operator=(CmdMemcpy &&obj) & noexcept;
// CmdMemcpy is not copyable
CmdMemcpy(CmdMemcpy const &) = delete;
// CmdMemcpy is not copyable
CmdMemcpy &operator=(CmdMemcpy const &) & = delete;
~CmdMemcpy() noexcept;
};
class DLL_EXPORT CmdPFSD final : public BaseCmd
{
public:
static uint32_t GetPFSDTestCount();
static CmdPFSD Create(uint32_t const pfsdTestId);
CmdPFSD() noexcept;
CmdPFSD(CmdPFSD &&obj) noexcept;
CmdPFSD &operator=(CmdPFSD &&obj) & noexcept;
// CmdPFSD is not copyable
CmdPFSD(CmdPFSD const &) = delete;
// CmdPFSD is not copyable
CmdPFSD &operator=(CmdPFSD const &) & = delete;
~CmdPFSD() noexcept;
};
class DLL_EXPORT Stream final : public DynamicStorage<impl::Stream>
{
public:
static Stream Create(EngineType absEngine = PVA0, AffinityType vpuAffinity = VPU_ANY);
Stream() noexcept;
Stream(Stream &&obj) noexcept;
Stream &operator=(Stream &&obj) & noexcept;
// Stream is not copyable
Stream(Stream const &) = delete;
// Stream is not copyable
Stream &operator=(Stream const &) & = delete;
void finalize();
~Stream() noexcept;
void submit(Cmd const *const commands, CmdStatus *const status = nullptr, int32_t const count = 1,
OrderType const order = IN_ORDER, int32_t const executionTimeout = -1,
int32_t const submitTimeout = -1);
void submit(CmdBuffer const &cmdBuf, int32_t const executionTimeout = -1, int32_t const submitTimeout = -1);
void submit(const BaseCmd &command, CmdStatus *const status = nullptr, OrderType const order = IN_ORDER,
int32_t const executionTimeout = -1, int32_t const submitTimeout = -1)
{
Cmd const cmdVar{&command};
submit(&cmdVar, status, 1, order, executionTimeout, submitTimeout);
}
void submit(const std::initializer_list<Cmd> &cmdList, CmdStatus *const status = nullptr,
OrderType const order = IN_ORDER, int32_t const executionTimeout = -1,
int32_t const submitTimeout = -1);
};
class DLL_EXPORT Context final : public DynamicStorage<impl::Context>
{
public:
static constexpr uint32_t ENGINE_MASK_ALL{cupva::config::ENGINE_MASK_ALL};
static Context Create(uint32_t const engineMask = ENGINE_MASK_ALL);
Context() noexcept;
Context(Context &&obj) noexcept;
Context &operator=(Context &&obj) & noexcept;
// Context is not copyable
Context(Context const &) = delete;
// Context is not copyable
Context &operator=(Context const &) & = delete;
void finalize();
~Context() noexcept;
static void SetCurrent(impl::Context *const ctx) noexcept;
static impl::Context *GetCurrent() noexcept;
};
} // namespace cupva
#endif // CUPVA_HOST_HPP