api/c_api/_nv_infer_runtime_8h_source.html

/*

 * Copyright 1993-2020 NVIDIA Corporation.  All rights reserved.

 *

 * NOTICE TO LICENSEE:

 *

 * This source code and/or documentation ("Licensed Deliverables") are

 * subject to NVIDIA intellectual property rights under U.S. and

 * international Copyright laws.

 *

 * These Licensed Deliverables contained herein is PROPRIETARY and

 * CONFIDENTIAL to NVIDIA and is being provided under the terms and

 * conditions of a form of NVIDIA software license agreement by and

 * between NVIDIA and Licensee ("License Agreement") or electronically

 * accepted by Licensee.  Notwithstanding any terms or conditions to

 * the contrary in the License Agreement, reproduction or disclosure

 * of the Licensed Deliverables to any third party without the express

 * written consent of NVIDIA is prohibited.

 *

 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE

 * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE

 * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS

 * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.

 * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED

 * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,

 * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.

 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE

 * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY

 * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY

 * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,

 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS

 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE

 * OF THESE LICENSED DELIVERABLES.

 *

 * U.S. Government End Users.  These Licensed Deliverables are a

 * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT

 * 1995), consisting of "commercial computer software" and "commercial

 * computer software documentation" as such terms are used in 48

 * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government

 * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and

 * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all

 * U.S. Government End Users acquire the Licensed Deliverables with

 * only those rights set forth herein.

 *

 * Any use of the Licensed Deliverables in individual and commercial

 * software must include, in the user documentation and internal

 * comments to the code, the above Disclaimer and U.S. Government End

 * Users Notice.

 */


#ifndef NV_INFER_RUNTIME_H

#define NV_INFER_RUNTIME_H


#include "NvInferRuntimeCommon.h"


namespace nvinfer1

{


class IExecutionContext;

class ICudaEngine;

class IPluginFactory;


enum class EngineCapability : int32_t

{

    kDEFAULT = 0,

    kSAFE_GPU = 1,

    kSAFE_DLA = 2,

};


template <>

constexpr inline int32_t EnumMax<EngineCapability>()

{

    return 3;

}


class Weights

{

public:

    DataType type;

    const void* values;

    int64_t count;

};


class IHostMemory

{

public:

    virtual void* data() const noexcept = 0;

    virtual std::size_t size() const noexcept = 0;

    virtual DataType type() const noexcept = 0;

    virtual void destroy() noexcept = 0;

protected:

    virtual ~IHostMemory() {}

};


class IPlugin

{

public:

    virtual int32_t getNbOutputs() const TRTNOEXCEPT = 0;


    virtual Dims getOutputDimensions(int32_t index, const Dims* inputs, int32_t nbInputDims) TRTNOEXCEPT = 0;


    virtual void configure(const Dims* inputDims, int32_t nbInputs, const Dims* outputDims, int32_t nbOutputs,

        int32_t maxBatchSize) TRTNOEXCEPT = 0;


    virtual int32_t initialize() TRTNOEXCEPT = 0;


    virtual void terminate() TRTNOEXCEPT = 0;


    virtual size_t getWorkspaceSize(int32_t maxBatchSize) const TRTNOEXCEPT = 0;


    virtual int32_t enqueue(int32_t batchSize, const void* const* inputs, void** outputs, void* workspace,

        cudaStream_t stream) TRTNOEXCEPT = 0;


    virtual size_t getSerializationSize() TRTNOEXCEPT = 0;


    virtual void serialize(void* buffer) TRTNOEXCEPT = 0;


    virtual ~IPlugin() {}

};


class IPluginExt : public IPlugin

{

public:

    virtual int32_t getTensorRTVersion() const TRTNOEXCEPT

    {

        return NV_TENSORRT_VERSION;

    }


    virtual bool supportsFormat(DataType type, PluginFormat format) const TRTNOEXCEPT = 0;


    virtual void configureWithFormat(const Dims* inputDims, int32_t nbInputs, const Dims* outputDims, int32_t nbOutputs,

        DataType type, PluginFormat format, int32_t maxBatchSize) TRTNOEXCEPT = 0;


    virtual ~IPluginExt() {}


protected:

    void configure(const Dims* /*inputDims*/, int32_t /*nbInputs*/, const Dims* /*outputDims*/, int32_t /*nbOutputs*/,

        int32_t /*maxBatchSize*/) _TENSORRT_FINAL TRTNOEXCEPT

    {

    }

};


enum class DimensionOperation : int32_t

{

    kSUM = 0,

    kPROD = 1,

    kMAX = 2,

    kMIN = 3,

    kSUB = 4,

    kEQUAL = 5,

    kLESS = 6,

    kFLOOR_DIV = 7,

    kCEIL_DIV = 8

};


template <>

constexpr inline int32_t EnumMax<DimensionOperation>()

{

    return 9;

}


class IDimensionExpr

{

public:

    virtual bool isConstant() const = 0;


    virtual int32_t getConstantValue() const = 0;


protected:

    virtual ~IDimensionExpr() {}

};


class IExprBuilder

{

public:

    virtual const IDimensionExpr* constant(int32_t value) = 0;


    virtual const IDimensionExpr* operation(DimensionOperation op, const IDimensionExpr& first, const IDimensionExpr& second) = 0;


protected:

    virtual ~IExprBuilder() {}

};


class DimsExprs

{

public:

    int32_t nbDims;

    const IDimensionExpr* d[Dims::MAX_DIMS];

};


struct DynamicPluginTensorDesc

{

    PluginTensorDesc desc;


    Dims min;


    Dims max;

};


class IPluginV2DynamicExt : public nvinfer1::IPluginV2Ext

{

public:

    IPluginV2DynamicExt* clone() const _TENSORRT_OVERRIDE TRTNOEXCEPT = 0;


    virtual DimsExprs getOutputDimensions(

        int32_t outputIndex, const DimsExprs* inputs, int32_t nbInputs, IExprBuilder& exprBuilder)

        = 0;


    static constexpr int32_t kFORMAT_COMBINATION_LIMIT = 100;


    virtual bool supportsFormatCombination(

        int32_t pos, const PluginTensorDesc* inOut, int32_t nbInputs, int32_t nbOutputs) TRTNOEXCEPT = 0;


    virtual void configurePlugin(const DynamicPluginTensorDesc* in, int32_t nbInputs,

        const DynamicPluginTensorDesc* out, int32_t nbOutputs) TRTNOEXCEPT = 0;


    virtual size_t getWorkspaceSize(const PluginTensorDesc* inputs, int32_t nbInputs, const PluginTensorDesc* outputs,

        int32_t nbOutputs) const TRTNOEXCEPT = 0;


    virtual int32_t enqueue(const PluginTensorDesc* inputDesc, const PluginTensorDesc* outputDesc,

        const void* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) TRTNOEXCEPT = 0;


protected:

    int32_t getTensorRTVersion() const _TENSORRT_OVERRIDE TRTNOEXCEPT

    {

        return (static_cast<int32_t>(PluginVersion::kV2_DYNAMICEXT) << 24 | (NV_TENSORRT_VERSION & 0xFFFFFF));

    }


    virtual ~IPluginV2DynamicExt() {}


    // Rest of the methods below are obsolete inherited methods, and marked final when using a C++11 compiler.

    // Derived classes should not override them.


    TRT_DEPRECATED

    Dims getOutputDimensions(

        int32_t /*index*/, const Dims* /*inputs*/, int32_t /*nbInputDims*/) _TENSORRT_FINAL TRTNOEXCEPT

    {

        return Dims{-1, {}, {}};

    }


    TRT_DEPRECATED

    bool isOutputBroadcastAcrossBatch(int32_t /*outputIndex*/, const bool* /*inputIsBroadcasted*/,

        int32_t /*nbInputs*/) const _TENSORRT_FINAL TRTNOEXCEPT

    {

        return false;

    }


    TRT_DEPRECATED

    bool canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const _TENSORRT_FINAL TRTNOEXCEPT

    {

        return true;

    }


    TRT_DEPRECATED

    bool supportsFormat(DataType /*type*/, PluginFormat /*format*/) const _TENSORRT_FINAL TRTNOEXCEPT

    {

        return false;

    }


    TRT_DEPRECATED

    void configurePlugin(const Dims* /*inputDims*/, int32_t /*nbInputs*/, const Dims* /*outputDims*/,

        int32_t /*nbOutputs*/, const DataType* /*inputTypes*/, const DataType* /*outputTypes*/,

        const bool* /*inputIsBroadcast*/, const bool* /*outputIsBroadcast*/, PluginFormat /*floatFormat*/,

        int32_t /*maxBatchSize*/) _TENSORRT_FINAL TRTNOEXCEPT

    {

    }


    TRT_DEPRECATED

    size_t getWorkspaceSize(int32_t /*maxBatchSize*/) const _TENSORRT_FINAL TRTNOEXCEPT

    {

        return 0;

    }


    TRT_DEPRECATED

    int32_t enqueue(int32_t /*batchSize*/, const void* const* /*inputs*/, void** /*outputs*/, void* /*workspace*/,

        cudaStream_t /*stream*/) _TENSORRT_FINAL TRTNOEXCEPT

    {

        return 1;

    }

};


class IProfiler

{

public:

    virtual void reportLayerTime(const char* layerName, float ms) TRTNOEXCEPT = 0;


    virtual ~IProfiler() {}

};


enum class WeightsRole : int32_t

{

    kKERNEL = 0,

    kBIAS = 1,

    kSHIFT = 2,

    kSCALE = 3,

    kCONSTANT = 4,

};


template <>

constexpr inline int32_t EnumMax<WeightsRole>()

{

    return 5;

}


enum class DeviceType : int32_t

{

    kGPU,

    kDLA,

};


template <>

constexpr inline int32_t EnumMax<DeviceType>()

{

    return 2;

}


class IRuntime

{

public:

    virtual nvinfer1::ICudaEngine* deserializeCudaEngine(const void* blob, std::size_t size, IPluginFactory* pluginFactory) noexcept = 0;


    virtual void setDLACore(int32_t dlaCore) noexcept = 0;


    virtual int32_t getDLACore() const noexcept = 0;


    virtual int32_t getNbDLACores() const noexcept = 0;


    virtual void destroy() noexcept = 0;


protected:

    virtual ~IRuntime() {}


public:

    virtual void setGpuAllocator(IGpuAllocator* allocator) noexcept = 0;


    //

    virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;


    virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;


    nvinfer1::ICudaEngine* deserializeCudaEngine(const void* blob, std::size_t size) noexcept

    {

        return deserializeCudaEngine(blob, size, nullptr);

    }

};


class IRefitter

{

public:

    virtual bool setWeights(const char* layerName, WeightsRole role, Weights weights) TRTNOEXCEPT = 0;


    virtual bool refitCudaEngine() TRTNOEXCEPT = 0;


    virtual int32_t getMissing(int32_t size, const char** layerNames, WeightsRole* roles) TRTNOEXCEPT = 0;


    virtual int32_t getAll(int32_t size, const char** layerNames, WeightsRole* roles) TRTNOEXCEPT = 0;


    virtual void destroy() TRTNOEXCEPT = 0;


protected:

    virtual ~IRefitter() {}


public:

    virtual bool setDynamicRange(const char* tensorName, float min, float max) TRTNOEXCEPT = 0;


    virtual float getDynamicRangeMin(const char* tensorName) const TRTNOEXCEPT = 0;


    virtual float getDynamicRangeMax(const char* tensorName) const TRTNOEXCEPT = 0;


    virtual int32_t getTensorsWithDynamicRange(int32_t size, const char** tensorNames) const TRTNOEXCEPT = 0;


    //

    virtual void setErrorRecorder(IErrorRecorder* recorder) TRTNOEXCEPT = 0;


    virtual IErrorRecorder* getErrorRecorder() const TRTNOEXCEPT = 0;

};


class IPluginFactory

{

public:

    virtual IPlugin* createPlugin(const char* layerName, const void* serialData, size_t serialLength) TRTNOEXCEPT = 0;


    virtual ~IPluginFactory() {}

};


enum class OptProfileSelector : int32_t

{

    kMIN = 0,

    kOPT = 1,

    kMAX = 2

};


template <>

constexpr inline int32_t EnumMax<OptProfileSelector>()

{

    return 3;

}


class IOptimizationProfile

{

public:

    virtual bool setDimensions(const char* inputName, OptProfileSelector select, Dims dims) noexcept = 0;


    virtual Dims getDimensions(const char* inputName, OptProfileSelector select) const noexcept = 0;


    virtual bool setShapeValues(

        const char* inputName, OptProfileSelector select, const int32_t* values, int32_t nbValues) noexcept

        = 0;


    virtual int32_t getNbShapeValues(const char* inputName) const noexcept = 0;


    virtual const int32_t* getShapeValues(const char* inputName, OptProfileSelector select) const noexcept = 0;


    virtual bool setExtraMemoryTarget(float target) noexcept = 0;


    virtual float getExtraMemoryTarget() const noexcept = 0;


    virtual bool isValid() const noexcept = 0;


protected:

    ~IOptimizationProfile() noexcept = default;

};


class ICudaEngine

{

public:

    virtual int32_t getNbBindings() const noexcept = 0;


    virtual int32_t getBindingIndex(const char* name) const noexcept = 0;


    virtual const char* getBindingName(int32_t bindingIndex) const noexcept = 0;


    virtual bool bindingIsInput(int32_t bindingIndex) const noexcept = 0;


    virtual Dims getBindingDimensions(int32_t bindingIndex) const noexcept = 0;


    virtual DataType getBindingDataType(int32_t bindingIndex) const noexcept = 0;


    virtual int32_t getMaxBatchSize() const noexcept = 0;


    virtual int32_t getNbLayers() const noexcept = 0;


    TRT_DEPRECATED

    virtual std::size_t getWorkspaceSize() const noexcept = 0;


    virtual IHostMemory* serialize() const noexcept = 0;


    virtual IExecutionContext* createExecutionContext() noexcept = 0;


    virtual void destroy() noexcept = 0;


    virtual TensorLocation getLocation(int32_t bindingIndex) const noexcept = 0;


protected:

    virtual ~ICudaEngine() {}


public:

    virtual IExecutionContext* createExecutionContextWithoutDeviceMemory() noexcept = 0;


    virtual size_t getDeviceMemorySize() const noexcept = 0;


    virtual bool isRefittable() const noexcept = 0;


    virtual int32_t getBindingBytesPerComponent(int32_t bindingIndex) const noexcept = 0;


    virtual int32_t getBindingComponentsPerElement(int32_t bindingIndex) const noexcept = 0;


    virtual TensorFormat getBindingFormat(int32_t bindingIndex) const noexcept = 0;


    virtual const char* getBindingFormatDesc(int32_t bindingIndex) const noexcept = 0;


    virtual int32_t getBindingVectorizedDim(int32_t bindingIndex) const noexcept = 0;


    virtual const char* getName() const noexcept = 0;


    virtual int32_t getNbOptimizationProfiles() const noexcept = 0;


    virtual Dims getProfileDimensions(int32_t bindingIndex, int32_t profileIndex, OptProfileSelector select) const

        noexcept

        = 0;


    virtual const int32_t* getProfileShapeValues(

        int32_t profileIndex, int32_t inputIndex, OptProfileSelector select) const noexcept

        = 0;


    virtual bool isShapeBinding(int32_t bindingIndex) const noexcept = 0;


    virtual bool isExecutionBinding(int32_t bindingIndex) const noexcept = 0;


    virtual EngineCapability getEngineCapability() const noexcept = 0;


    //

    virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;


    virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;


    virtual bool hasImplicitBatchDimension() const TRTNOEXCEPT = 0;

};


class IExecutionContext

{

public:

    virtual bool execute(int32_t batchSize, void** bindings) noexcept = 0;


    virtual bool enqueue(int32_t batchSize, void** bindings, cudaStream_t stream, cudaEvent_t* inputConsumed) noexcept

        = 0;


    virtual void setDebugSync(bool sync) noexcept = 0;


    virtual bool getDebugSync() const noexcept = 0;


    virtual void setProfiler(IProfiler*) noexcept = 0;


    virtual IProfiler* getProfiler() const noexcept = 0;


    virtual const ICudaEngine& getEngine() const noexcept = 0;


    virtual void destroy() noexcept = 0;


protected:

    virtual ~IExecutionContext() noexcept {}


public:

    virtual void setName(const char* name) noexcept = 0;


    virtual const char* getName() const noexcept = 0;


    virtual void setDeviceMemory(void* memory) noexcept = 0;


    virtual Dims getStrides(int32_t bindingIndex) const noexcept = 0;


public:

    TRT_DEPRECATED

    virtual bool setOptimizationProfile(int32_t profileIndex) noexcept = 0;


    virtual int32_t getOptimizationProfile() const noexcept = 0;


    virtual bool setBindingDimensions(int32_t bindingIndex, Dims dimensions) noexcept = 0;


    virtual Dims getBindingDimensions(int32_t bindingIndex) const noexcept = 0;


    virtual bool setInputShapeBinding(int32_t bindingIndex, const int32_t* data) noexcept = 0;


    virtual bool getShapeBinding(int32_t bindingIndex, int32_t* data) const noexcept = 0;


    virtual bool allInputDimensionsSpecified() const noexcept = 0;


    virtual bool allInputShapesSpecified() const noexcept = 0;


    //

    virtual void setErrorRecorder(IErrorRecorder* recorder) noexcept = 0;


    virtual IErrorRecorder* getErrorRecorder() const noexcept = 0;


    virtual bool executeV2(void** bindings) noexcept = 0;


    virtual bool enqueueV2(void** bindings, cudaStream_t stream, cudaEvent_t* inputConsumed) noexcept = 0;


    virtual bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept = 0;

}; // class IExecutionContext

} // namespace nvinfer1


extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(void* logger, int32_t version);


extern "C" TENSORRTAPI void* createInferRefitter_INTERNAL(void* engine, void* logger, int32_t version);


namespace nvinfer1

{

namespace // unnamed namespace avoids linkage surprises when linking objects built with different versions of this header.

{

inline IRuntime* createInferRuntime(ILogger& logger)

{

    return static_cast<IRuntime*>(createInferRuntime_INTERNAL(&logger, NV_TENSORRT_VERSION));

}


inline IRefitter* createInferRefitter(ICudaEngine& engine, ILogger& logger)

{

    return static_cast<IRefitter*>(createInferRefitter_INTERNAL(&engine, &logger, NV_TENSORRT_VERSION));

}

}

}


#endif // NV_INFER_RUNTIME_H