2021.1/NvNeuralSDK/_cuda_types_8h_source.html

 /*

 * SPDX-FileCopyrightText: Copyright (c) 2020-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.

 * SPDX-License-Identifier: MIT

 *

 * Permission is hereby granted, free of charge, to any person obtaining a

 * copy of this software and associated documentation files (the "Software"),

 * to deal in the Software without restriction, including without limitation

 * the rights to use, copy, modify, merge, publish, distribute, sublicense,

 * and/or sell copies of the Software, and to permit persons to whom the

 * Software is furnished to do so, subject to the following conditions:

 *

 * The above copyright notice and this permission notice shall be included in

 * all copies or substantial portions of the Software.

 *

 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL

 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING

 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER

 * DEALINGS IN THE SOFTWARE.

 */


 #ifndef NVNEURAL_CUDATYPES_H

 #define NVNEURAL_CUDATYPES_H


 #include <nvneural/CoreTypes.h>

 #include <cuda.h>


 namespace nvneural {


 class INetworkBackendCuda;

 #define NVNEURAL_INETWORKBACKENDCUDA_OBJECTCLASS "com.nvidia.backendcuda"

 #define NVNEURAL_ICUDAMEMORYALLOCATOR_OBJECTCLASS "com.nvidia.memoryallocatorcuda"


 class ICudaMemoryAllocator : public IRefObject

 {

 public:

     static const IRefObject::TypeId typeID = 0x121e6098096e5c97ul;


     virtual NeuralResult allocateMemoryBlock(MemoryHandle* pHandleOut, std::size_t byteCount, MemorySemantic semantic) noexcept = 0;


     virtual NeuralResult freeMemoryBlock(MemoryHandle handle) noexcept = 0;


     virtual void* getAddressForMemoryBlock(MemoryHandle handle) const noexcept = 0;


     virtual std::size_t getSizeForMemoryBlock(MemoryHandle handle) const noexcept = 0;


     virtual NeuralResult lockMemoryBlock(MemoryHandle handle) noexcept = 0;


     virtual NeuralResult unlockMemoryBlock(MemoryHandle handle) noexcept = 0;


     virtual NeuralResult compactMemory() noexcept = 0;

 };


 class ICudaCompiledFunction : public IRefObject

 {

 public:

     static const IRefObject::TypeId typeID = 0x467d6d0e91bcc332ul;


     virtual NeuralResult launch(

         INetworkBackendCuda* pBackend,

         std::size_t gridSizeX,

         std::size_t gridSizeY,

         std::size_t gridSizeZ,

         std::size_t blockSizeX,

         std::size_t blockSizeY,

         std::size_t blockSizeZ,

         void** ppArguments,

         std::uint32_t smem) const noexcept = 0;


     virtual CUmodule module() const noexcept = 0;


     virtual CUfunction function() const noexcept = 0;


     virtual std::size_t compiledBinarySize() const noexcept = 0;


     virtual const void* compiledBinary() const noexcept = 0;

 };


 class ICudaRuntimeCompiler : public IRefObject

 {

 public:

     static const IRefObject::TypeId typeID = 0xe39c2816f916d342ul;


     struct CompilationDetails

     {

         const char* pModuleName = nullptr;

         const char* pModuleSource = nullptr;

         const char* pModuleEntryPoint = nullptr;

         IStringList* pHeaderNames = nullptr;

         IStringList* pHeaderContents = nullptr;

         IStringList* pAdditionalCompilerOptions = nullptr;

     };


     virtual const char* targetArchitecture() const noexcept = 0;


     virtual NeuralResult setTargetArchitecture(const char* pTargetArch) noexcept = 0;


     virtual NeuralResult compile(ICudaCompiledFunction** ppCompiledFunctionOut, const CompilationDetails& compilationDetails) noexcept = 0;


     virtual NeuralResult loadCubin(ICudaCompiledFunction** ppCompiledFunctionOut, std::uint8_t* pCode, std::size_t codeSize, const char* pEntryPoint) noexcept = 0;

 };


 class INetworkBackendCuda : public IRefObject

 {

 public:

     static const IRefObject::TypeId typeID = 0x61f19c57a3032f9ul;


     virtual ICudaMemoryAllocator* getAllocator() const noexcept = 0;


     virtual NeuralResult setAllocator(ICudaMemoryAllocator* pAllocator) noexcept = 0;


     virtual CUcontext getCudaContext() const noexcept = 0;


     virtual CUdevice getCudaDevice() const noexcept = 0;


     virtual CUstream getCudaStream() const noexcept = 0;


     virtual ICudaRuntimeCompiler* runtimeCompiler() const noexcept = 0;

 };


 } // namespace nvneural


 #endif // NVNEURAL_CUDATYPES_H

CoreTypes.h
Fundamental NvNeural data types are declared here.

nvneural::MemorySemantic
MemorySemantic
Describes the intended purpose of allocated GPU memory.
Definition: CoreTypes.h:629

nvneural::MemoryHandle
MemoryHandle__type * MemoryHandle
Opaque typedef used to represent INetworkBackend memory handles.
Definition: CoreTypes.h:624

nvneural::NeuralResult
NeuralResult
NeuralResult is a generic success/failure result type similar to COM HRESULT.
Definition: CoreTypes.h:273

nvneural::ICudaCompiledFunction
Represents a runtime-compiled function object from ICudaRuntimeCompiler.
Definition: CudaTypes.h:132

nvneural::ICudaCompiledFunction::module
virtual CUmodule module() const noexcept=0
Returns the CUmodule containing this function object.

nvneural::ICudaCompiledFunction::launch
virtual NeuralResult launch(INetworkBackendCuda *pBackend, std::size_t gridSizeX, std::size_t gridSizeY, std::size_t gridSizeZ, std::size_t blockSizeX, std::size_t blockSizeY, std::size_t blockSizeZ, void **ppArguments, std::uint32_t smem) const noexcept=0
Launches the function on the specified CUDA backend's stream.

nvneural::ICudaMemoryAllocator
Generic interface for CUDA device memory allocation.
Definition: CudaTypes.h:52

nvneural::ICudaMemoryAllocator::typeID
static const IRefObject::TypeId typeID
Interface TypeId for InterfaceOf purposes.
Definition: CudaTypes.h:55

nvneural::ICudaMemoryAllocator::unlockMemoryBlock
virtual NeuralResult unlockMemoryBlock(MemoryHandle handle) noexcept=0
Removes a lock from a preexisting memory block.

nvneural::ICudaMemoryAllocator::getSizeForMemoryBlock
virtual std::size_t getSizeForMemoryBlock(MemoryHandle handle) const noexcept=0
Returns the buffer size associated with a memory handle.

nvneural::ICudaMemoryAllocator::compactMemory
virtual NeuralResult compactMemory() noexcept=0
Signals the allocator to release unused memory blocks back to the system.

nvneural::ICudaMemoryAllocator::getAddressForMemoryBlock
virtual void * getAddressForMemoryBlock(MemoryHandle handle) const noexcept=0
Converts a memory handle to a GPU virtual address.

nvneural::ICudaMemoryAllocator::lockMemoryBlock
virtual NeuralResult lockMemoryBlock(MemoryHandle handle) noexcept=0
Adds a lock to a preexisting memory block.

nvneural::ICudaMemoryAllocator::freeMemoryBlock
virtual NeuralResult freeMemoryBlock(MemoryHandle handle) noexcept=0
Frees a memory block.

nvneural::ICudaMemoryAllocator::allocateMemoryBlock
virtual NeuralResult allocateMemoryBlock(MemoryHandle *pHandleOut, std::size_t byteCount, MemorySemantic semantic) noexcept=0
Allocates a new memory block and returns a handle to it.

nvneural::ICudaRuntimeCompiler
Represents a runtime compiler that can transform CUDA source code into compiled functions.
Definition: CudaTypes.h:191

nvneural::ICudaRuntimeCompiler::targetArchitecture
virtual const char * targetArchitecture() const noexcept=0
Returns the current target GPU architecture for compilation.

nvneural::INetworkBackendCuda
INetworkBackend companion interface with CUDA-specific functionality.
Definition: CudaTypes.h:267

nvneural::INetworkBackendCuda::getAllocator
virtual ICudaMemoryAllocator * getAllocator() const noexcept=0
Returns the CUDA memory allocator interface.

nvneural::IRefObject
Base class for all objects, similar to COM's IUnknown.
Definition: CoreTypes.h:341

nvneural::IRefObject::TypeId
std::uint64_t TypeId
Every interface must define a unique TypeId. This should be randomized.
Definition: CoreTypes.h:347

nvneural::IStringList
IStringList represents an immutable collection of strings.
Definition: CoreTypes.h:946

nvneural::ICudaRuntimeCompiler::CompilationDetails
Params struct describing a compilation request.
Definition: CudaTypes.h:198