26 #ifndef NVNEURAL_CUDATYPES_H 
   27 #define NVNEURAL_CUDATYPES_H 
   34 class INetworkBackendCuda;
 
   36 #define NVNEURAL_INETWORKBACKENDCUDA_OBJECTCLASS "com.nvidia.backendcuda" 
   38 #define NVNEURAL_ICUDAMEMORYALLOCATOR_OBJECTCLASS "com.nvidia.memoryallocatorcuda" 
  231         std::size_t gridSizeX,
 
  232         std::size_t gridSizeY,
 
  233         std::size_t gridSizeZ,
 
  234         std::size_t blockSizeX,
 
  235         std::size_t blockSizeY,
 
  236         std::size_t blockSizeZ,
 
  238         std::uint32_t smem) 
const noexcept = 0;
 
  244     virtual CUmodule 
module() const noexcept = 0;
 
  250     virtual CUfunction function() const noexcept = 0;
 
  256     virtual std::
size_t compiledBinarySize() const noexcept = 0;
 
  262     virtual const 
void* compiledBinary() const noexcept = 0;
 
  279         const char* pModuleName = 
nullptr;
 
  281         const char* pModuleSource = 
nullptr;
 
  283         const char* pModuleEntryPoint = 
nullptr;
 
  317     virtual 
NeuralResult setTargetArchitecture(const 
char* pTargetArch) noexcept = 0;
 
  360     virtual CUcontext getCudaContext() const noexcept = 0;
 
  363     virtual CUdevice getCudaDevice() const noexcept = 0;
 
  369     virtual CUstream getCudaStream() const noexcept = 0;
 
Fundamental NvNeural data types are declared here.
 
MemorySemantic
Describes the intended purpose of allocated GPU memory.
Definition: CoreTypes.h:631
 
MemoryHandle__type * MemoryHandle
Opaque typedef used to represent INetworkBackend memory handles.
Definition: CoreTypes.h:626
 
NeuralResult
NeuralResult is a generic success/failure result type similar to COM HRESULT.
Definition: CoreTypes.h:275
 
Represents a runtime-compiled function object from ICudaRuntimeCompiler.
Definition: CudaTypes.h:211
 
virtual CUmodule module() const noexcept=0
Returns the CUmodule containing this function object.
 
virtual NeuralResult launch(INetworkBackendCuda *pBackend, std::size_t gridSizeX, std::size_t gridSizeY, std::size_t gridSizeZ, std::size_t blockSizeX, std::size_t blockSizeY, std::size_t blockSizeZ, void **ppArguments, std::uint32_t smem) const noexcept=0
Launches the function on the specified CUDA backend's stream.
 
Generic interface for CUDA device memory allocation.
Definition: CudaTypes.h:133
 
virtual NeuralResult allocateMemoryBlock(MemoryHandle *pHandleOut, size_t byteCount, MemorySemantic semantic, const char *pTrackingKey, const char *pTrackingSubkey) noexcept=0
Allocates a memory block of the requested size and allows tracking of the memory block using a given ...
 
virtual NeuralResult getMemoryTrackingSubkeys(const char *pTrackingKey, IStringList **ppKeysOut) noexcept=0
Returns an IStringList of the subkeys of given tracking key.
 
virtual NeuralResult getMemoryTrackingKeys(IStringList **ppKeysOut) noexcept=0
Returns an IStringList of the currently tracked keys.
 
virtual NeuralResult setMemoryTrackingKey(const char *pTrackingKey, const char *pTrackingSubkey) noexcept=0
Sets a potential tracking key.
 
virtual const MemoryTrackingData * getMemoryTrackingData(const char *pTrackingKey, const char *pTrackingSubkey) const noexcept=0
Compiles and returns memory data for the given key.
 
Generic interface for CUDA device memory allocation.
Definition: CudaTypes.h:52
 
static const IRefObject::TypeId typeID
Interface TypeId for InterfaceOf purposes.
Definition: CudaTypes.h:55
 
virtual NeuralResult unlockMemoryBlock(MemoryHandle handle) noexcept=0
Removes a lock from a preexisting memory block.
 
virtual std::size_t getSizeForMemoryBlock(MemoryHandle handle) const noexcept=0
Returns the buffer size associated with a memory handle.
 
virtual NeuralResult compactMemory() noexcept=0
Signals the allocator to release unused memory blocks back to the system.
 
virtual void * getAddressForMemoryBlock(MemoryHandle handle) const noexcept=0
Converts a memory handle to a GPU virtual address.
 
virtual NeuralResult lockMemoryBlock(MemoryHandle handle) noexcept=0
Adds a lock to a preexisting memory block.
 
virtual NeuralResult freeMemoryBlock(MemoryHandle handle) noexcept=0
Frees a memory block.
 
virtual NeuralResult allocateMemoryBlock(MemoryHandle *pHandleOut, std::size_t byteCount, MemorySemantic semantic) noexcept=0
Allocates a new memory block and returns a handle to it.
 
Represents a runtime compiler that can transform CUDA source code into compiled functions.
Definition: CudaTypes.h:270
 
virtual const char * targetArchitecture() const noexcept=0
Returns the current target GPU architecture for compilation.
 
INetworkBackend companion interface with CUDA-specific functionality.
Definition: CudaTypes.h:346
 
virtual ICudaMemoryAllocator * getAllocator() const noexcept=0
Returns the CUDA memory allocator interface.
 
Base class for all objects, similar to COM's IUnknown.
Definition: CoreTypes.h:343
 
std::uint64_t TypeId
Every interface must define a unique TypeId. This should be randomized.
Definition: CoreTypes.h:349
 
IStringList represents an immutable collection of strings.
Definition: CoreTypes.h:1079
 
Params struct describing a compilation request.
Definition: CudaTypes.h:277
 
Structure describing details of an object's memory allocation behavior.
Definition: CoreTypes.h:931