api/c_api/_nv_infer_runtime_8h_source.html

/*

 * Copyright 1993-2021 NVIDIA Corporation.  All rights reserved.

 *

 * NOTICE TO LICENSEE:

 *

 * This source code and/or documentation ("Licensed Deliverables") are

 * subject to NVIDIA intellectual property rights under U.S. and

 * international Copyright laws.

 *

 * These Licensed Deliverables contained herein is PROPRIETARY and

 * CONFIDENTIAL to NVIDIA and is being provided under the terms and

 * conditions of a form of NVIDIA software license agreement by and

 * between NVIDIA and Licensee ("License Agreement") or electronically

 * accepted by Licensee.  Notwithstanding any terms or conditions to

 * the contrary in the License Agreement, reproduction or disclosure

 * of the Licensed Deliverables to any third party without the express

 * written consent of NVIDIA is prohibited.

 *

 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE

 * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE

 * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS

 * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.

 * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED

 * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,

 * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.

 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE

 * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY

 * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY

 * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,

 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS

 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE

 * OF THESE LICENSED DELIVERABLES.

 *

 * U.S. Government End Users.  These Licensed Deliverables are a

 * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT

 * 1995), consisting of "commercial computer software" and "commercial

 * computer software documentation" as such terms are used in 48

 * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government

 * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and

 * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all

 * U.S. Government End Users acquire the Licensed Deliverables with

 * only those rights set forth herein.

 *

 * Any use of the Licensed Deliverables in individual and commercial

 * software must include, in the user documentation and internal

 * comments to the code, the above Disclaimer and U.S. Government End

 * Users Notice.

 */


#ifndef NV_INFER_RUNTIME_H

#define NV_INFER_RUNTIME_H


#include "NvInferImpl.h"

#include "NvInferRuntimeCommon.h"


namespace nvinfer1

{


class IExecutionContext;

class ICudaEngine;

class IPluginFactory;


class INoCopy

{

protected:

    INoCopy() = default;

    virtual ~INoCopy() = default;

    INoCopy(const INoCopy& other) = delete;

    INoCopy& operator=(const INoCopy& other) = delete;

    INoCopy(INoCopy&& other) = delete;

    INoCopy& operator=(INoCopy&& other) = delete;

};


enum class EngineCapability : int32_t

{

    kSTANDARD = 0,

    kDEFAULT TRT_DEPRECATED_ENUM = kSTANDARD,


    kSAFETY = 1,

    kSAFE_GPU TRT_DEPRECATED_ENUM = kSAFETY,


    kDLA_STANDALONE = 2,

    kSAFE_DLA TRT_DEPRECATED_ENUM = kDLA_STANDALONE,

};


namespace impl

{

template <>

struct EnumMaxImpl<EngineCapability>

{

    static constexpr int32_t kVALUE = 3;

};

} // namespace impl


class Weights

{

public:

    DataType type;

    const void* values;

    int64_t count;

};


class IHostMemory : public INoCopy

{

public:

    virtual ~IHostMemory() noexcept = default;


    void* data() const noexcept

    {

        return mImpl->data();

    }


    std::size_t size() const noexcept

    {

        return mImpl->size();

    }


    DataType type() const noexcept

    {

        return mImpl->type();

    }

    TRT_DEPRECATED void destroy() noexcept

    {

        delete this;

    }


protected:

    apiv::VHostMemory* mImpl;

};


enum class DimensionOperation : int32_t

{

    kSUM = 0,

    kPROD = 1,

    kMAX = 2,

    kMIN = 3,

    kSUB = 4,

    kEQUAL = 5,

    kLESS = 6,

    kFLOOR_DIV = 7,

    kCEIL_DIV = 8

};


template <>

constexpr inline int32_t EnumMax<DimensionOperation>() noexcept

{

    return 9;

}


enum class TensorLocation : int32_t

{

    kDEVICE = 0,

    kHOST = 1,

};


namespace impl

{

template <>

struct EnumMaxImpl<TensorLocation>

{

    static constexpr int32_t kVALUE = 2;

};

} // namespace impl


class IDimensionExpr : public INoCopy

{

public:

    bool isConstant() const noexcept

    {

        return mImpl->isConstant();

    }


    int32_t getConstantValue() const noexcept

    {

        return mImpl->getConstantValue();

    }


protected:

    apiv::VDimensionExpr* mImpl;

    virtual ~IDimensionExpr() noexcept = default;

};


class IExprBuilder : public INoCopy

{

public:

    const IDimensionExpr* constant(int32_t value) noexcept

    {

        return mImpl->constant(value);

    }


    const IDimensionExpr* operation(

        DimensionOperation op, const IDimensionExpr& first, const IDimensionExpr& second) noexcept

    {

        return mImpl->operation(op, first, second);

    }


protected:

    apiv::VExprBuilder* mImpl;

    virtual ~IExprBuilder() noexcept = default;

};


class DimsExprs

{

public:

    int32_t nbDims;

    const IDimensionExpr* d[Dims::MAX_DIMS];

};


struct DynamicPluginTensorDesc

{

    PluginTensorDesc desc;


    Dims min;


    Dims max;

};


class IPluginV2DynamicExt : public nvinfer1::IPluginV2Ext

{

public:

    IPluginV2DynamicExt* clone() const noexcept override = 0;


    virtual DimsExprs getOutputDimensions(

        int32_t outputIndex, const DimsExprs* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept

        = 0;


    static constexpr int32_t kFORMAT_COMBINATION_LIMIT = 100;


    virtual bool supportsFormatCombination(

        int32_t pos, const PluginTensorDesc* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept

        = 0;


    virtual void configurePlugin(const DynamicPluginTensorDesc* in, int32_t nbInputs,

        const DynamicPluginTensorDesc* out, int32_t nbOutputs) noexcept

        = 0;


    virtual size_t getWorkspaceSize(const PluginTensorDesc* inputs, int32_t nbInputs, const PluginTensorDesc* outputs,

        int32_t nbOutputs) const noexcept

        = 0;


    virtual int32_t enqueue(const PluginTensorDesc* inputDesc, const PluginTensorDesc* outputDesc,

        const void* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept

        = 0;


protected:

    int32_t getTensorRTVersion() const noexcept override

    {

        return (static_cast<int32_t>(PluginVersion::kV2_DYNAMICEXT) << 24 | (NV_TENSORRT_VERSION & 0xFFFFFF));

    }


    virtual ~IPluginV2DynamicExt() noexcept {}


private:

    // Following are obsolete base class methods, and must not be implemented or used.


    void configurePlugin(Dims const*, int32_t, Dims const*, int32_t, DataType const*, DataType const*, bool const*,

        bool const*, PluginFormat, int32_t) noexcept override final

    {

    }


    bool supportsFormat(DataType, PluginFormat) const noexcept override final

    {

        return false;

    }


    Dims getOutputDimensions(int32_t, Dims const*, int32_t) noexcept override final

    {

        return Dims{-1, {}};

    }


    bool isOutputBroadcastAcrossBatch(int32_t, bool const*, int32_t) const noexcept override final

    {

        return false;

    }


    bool canBroadcastInputAcrossBatch(int32_t) const noexcept override final

    {

        return true;

    }


    size_t getWorkspaceSize(int32_t) const noexcept override final

    {

        return 0;

    }


    int32_t enqueue(int32_t, const void* const*, void* const*, void*, cudaStream_t) noexcept override final

    {

        return 1;

    }

};


class IProfiler

{

public:

    virtual void reportLayerTime(const char* layerName, float ms) noexcept = 0;


    virtual ~IProfiler() noexcept {}

};


enum class WeightsRole : int32_t

{

    kKERNEL = 0,

    kBIAS = 1,

    kSHIFT = 2,

    kSCALE = 3,

    kCONSTANT = 4,

    kANY = 5,

};


template <>

constexpr inline int32_t EnumMax<WeightsRole>() noexcept

{

    return 6;

}


enum class DeviceType : int32_t

{

    kGPU,

    kDLA,

};


template <>

constexpr inline int32_t EnumMax<DeviceType>() noexcept

{

    return 2;

}


class IRuntime : public INoCopy

{

public:

    virtual ~IRuntime() noexcept = default;


    TRT_DEPRECATED nvinfer1::ICudaEngine* deserializeCudaEngine(

        const void* blob, std::size_t size, IPluginFactory* pluginFactory) noexcept

    {

        return mImpl->deserializeCudaEngine(blob, size, nullptr);

    }


    void setDLACore(int32_t dlaCore) noexcept

    {

        mImpl->setDLACore(dlaCore);

    }


    int32_t getDLACore() const noexcept

    {

        return mImpl->getDLACore();

    }


    int32_t getNbDLACores() const noexcept

    {

        return mImpl->getNbDLACores();

    }


    TRT_DEPRECATED void destroy() noexcept

    {

        delete this;

    }


    void setGpuAllocator(IGpuAllocator* allocator) noexcept

    {

        mImpl->setGpuAllocator(allocator);

    }


    //

    void setErrorRecorder(IErrorRecorder* recorder) noexcept

    {

        mImpl->setErrorRecorder(recorder);

    }


    IErrorRecorder* getErrorRecorder() const noexcept

    {

        return mImpl->getErrorRecorder();

    }


    nvinfer1::ICudaEngine* deserializeCudaEngine(const void* blob, std::size_t size) noexcept

    {

        return mImpl->deserializeCudaEngine(blob, size, nullptr);

    }


protected:

    apiv::VRuntime* mImpl;

};


class IRefitter : public INoCopy

{

public:

    virtual ~IRefitter() noexcept = default;


    bool setWeights(const char* layerName, WeightsRole role, Weights weights) noexcept

    {

        return mImpl->setWeights(layerName, role, weights);

    }


    bool refitCudaEngine() noexcept

    {

        return mImpl->refitCudaEngine();

    }


    int32_t getMissing(int32_t size, const char** layerNames, WeightsRole* roles) noexcept

    {

        return mImpl->getMissing(size, layerNames, roles);

    }


    int32_t getAll(int32_t size, const char** layerNames, WeightsRole* roles) noexcept

    {

        return mImpl->getAll(size, layerNames, roles);

    }


    TRT_DEPRECATED void destroy() noexcept

    {

        delete this;

    }


    bool setDynamicRange(const char* tensorName, float min, float max) noexcept

    {

        return mImpl->setDynamicRange(tensorName, min, max);

    }


    float getDynamicRangeMin(const char* tensorName) const noexcept

    {

        return mImpl->getDynamicRangeMin(tensorName);

    }


    float getDynamicRangeMax(const char* tensorName) const noexcept

    {

        return mImpl->getDynamicRangeMax(tensorName);

    }


    int32_t getTensorsWithDynamicRange(int32_t size, const char** tensorNames) const noexcept

    {

        return mImpl->getTensorsWithDynamicRange(size, tensorNames);

    }


    //

    void setErrorRecorder(IErrorRecorder* recorder) noexcept

    {

        mImpl->setErrorRecorder(recorder);

    }


    IErrorRecorder* getErrorRecorder() const noexcept

    {

        return mImpl->getErrorRecorder();

    }


    bool setNamedWeights(const char* name, Weights weights) noexcept

    {

        return mImpl->setNamedWeights(name, weights);

    }


    int32_t getMissingWeights(int32_t size, const char** weightsNames) noexcept

    {

        return mImpl->getMissingWeights(size, weightsNames);

    }


    int32_t getAllWeights(int32_t size, const char** weightsNames) noexcept

    {

        return mImpl->getAllWeights(size, weightsNames);

    }


protected:

    apiv::VRefitter* mImpl;

};


enum class OptProfileSelector : int32_t

{

    kMIN = 0,

    kOPT = 1,

    kMAX = 2

};


template <>

constexpr inline int32_t EnumMax<OptProfileSelector>() noexcept

{

    return 3;

}


class IOptimizationProfile : public INoCopy

{

public:

    bool setDimensions(const char* inputName, OptProfileSelector select, Dims dims) noexcept

    {

        return mImpl->setDimensions(inputName, select, dims);

    }


    Dims getDimensions(const char* inputName, OptProfileSelector select) const noexcept

    {

        return mImpl->getDimensions(inputName, select);

    }


    bool setShapeValues(

        const char* inputName, OptProfileSelector select, const int32_t* values, int32_t nbValues) noexcept

    {

        return mImpl->setShapeValues(inputName, select, values, nbValues);

    }


    int32_t getNbShapeValues(const char* inputName) const noexcept

    {

        return mImpl->getNbShapeValues(inputName);

    }


    int32_t const* getShapeValues(const char* inputName, OptProfileSelector select) const noexcept

    {

        return mImpl->getShapeValues(inputName, select);

    }


    bool setExtraMemoryTarget(float target) noexcept

    {

        return mImpl->setExtraMemoryTarget(target);

    }


    float getExtraMemoryTarget() const noexcept

    {

        return mImpl->getExtraMemoryTarget();

    }


    bool isValid() const noexcept

    {

        return mImpl->isValid();

    }


protected:

    apiv::VOptimizationProfile* mImpl;

    virtual ~IOptimizationProfile() noexcept = default;

};


enum class TacticSource : int32_t

{

    kCUBLAS = 0,

    kCUBLAS_LT = 1,

    kCUDNN = 2

};


template <>

constexpr inline int32_t EnumMax<TacticSource>() noexcept

{

    return 3;

}


using TacticSources = uint32_t;


class ICudaEngine : public INoCopy

{

public:

    virtual ~ICudaEngine() noexcept = default;


    int32_t getNbBindings() const noexcept

    {

        return mImpl->getNbBindings();

    }


    int32_t getBindingIndex(const char* name) const noexcept

    {

        return mImpl->getBindingIndex(name);

    }


    const char* getBindingName(int32_t bindingIndex) const noexcept

    {

        return mImpl->getBindingName(bindingIndex);

    }


    bool bindingIsInput(int32_t bindingIndex) const noexcept

    {

        return mImpl->bindingIsInput(bindingIndex);

    }


    Dims getBindingDimensions(int32_t bindingIndex) const noexcept

    {

        return mImpl->getBindingDimensions(bindingIndex);

    }


    DataType getBindingDataType(int32_t bindingIndex) const noexcept

    {

        return mImpl->getBindingDataType(bindingIndex);

    }


    int32_t getMaxBatchSize() const noexcept

    {

        return mImpl->getMaxBatchSize();

    }


    int32_t getNbLayers() const noexcept

    {

        return mImpl->getNbLayers();

    }


    IHostMemory* serialize() const noexcept

    {

        return mImpl->serialize();

    }


    IExecutionContext* createExecutionContext() noexcept

    {

        return mImpl->createExecutionContext();

    }


    TRT_DEPRECATED void destroy() noexcept

    {

        delete this;

    }


    TensorLocation getLocation(int32_t bindingIndex) const noexcept

    {

        return mImpl->getLocation(bindingIndex);

    }


    IExecutionContext* createExecutionContextWithoutDeviceMemory() noexcept

    {

        return mImpl->createExecutionContextWithoutDeviceMemory();

    }


    size_t getDeviceMemorySize() const noexcept

    {

        return mImpl->getDeviceMemorySize();

    }


    bool isRefittable() const noexcept

    {

        return mImpl->isRefittable();

    }


    int32_t getBindingBytesPerComponent(int32_t bindingIndex) const noexcept

    {

        return mImpl->getBindingBytesPerComponent(bindingIndex);

    }


    int32_t getBindingComponentsPerElement(int32_t bindingIndex) const noexcept

    {

        return mImpl->getBindingComponentsPerElement(bindingIndex);

    }


    TensorFormat getBindingFormat(int32_t bindingIndex) const noexcept

    {

        return mImpl->getBindingFormat(bindingIndex);

    }


    const char* getBindingFormatDesc(int32_t bindingIndex) const noexcept

    {

        return mImpl->getBindingFormatDesc(bindingIndex);

    }


    int32_t getBindingVectorizedDim(int32_t bindingIndex) const noexcept

    {

        return mImpl->getBindingVectorizedDim(bindingIndex);

    }


    const char* getName() const noexcept

    {

        return mImpl->getName();

    }


    int32_t getNbOptimizationProfiles() const noexcept

    {

        return mImpl->getNbOptimizationProfiles();

    }


    Dims getProfileDimensions(int32_t bindingIndex, int32_t profileIndex, OptProfileSelector select) const noexcept

    {

        return mImpl->getProfileDimensions(bindingIndex, profileIndex, select);

    }


    const int32_t* getProfileShapeValues(int32_t profileIndex, int32_t inputIndex, OptProfileSelector select) const

        noexcept

    {

        return mImpl->getProfileShapeValues(profileIndex, inputIndex, select);

    }


    bool isShapeBinding(int32_t bindingIndex) const noexcept

    {

        return mImpl->isShapeBinding(bindingIndex);

    }


    bool isExecutionBinding(int32_t bindingIndex) const noexcept

    {

        return mImpl->isExecutionBinding(bindingIndex);

    }


    EngineCapability getEngineCapability() const noexcept

    {

        return mImpl->getEngineCapability();

    }


    //

    void setErrorRecorder(IErrorRecorder* recorder) noexcept

    {

        return mImpl->setErrorRecorder(recorder);

    }


    IErrorRecorder* getErrorRecorder() const noexcept

    {

        return mImpl->getErrorRecorder();

    }


    bool hasImplicitBatchDimension() const noexcept

    {

        return mImpl->hasImplicitBatchDimension();

    }


    TacticSources getTacticSources() const noexcept

    {

        return mImpl->getTacticSources();

    }


protected:

    apiv::VCudaEngine* mImpl;

};


class IExecutionContext : public INoCopy

{

public:

    virtual ~IExecutionContext() noexcept = default;


    bool execute(int32_t batchSize, void* const* bindings) noexcept

    {

        return mImpl->execute(batchSize, bindings);

    }


    bool enqueue(int32_t batchSize, void* const* bindings, cudaStream_t stream, cudaEvent_t* inputConsumed) noexcept

    {

        return mImpl->enqueue(batchSize, bindings, stream, inputConsumed);

    }


    void setDebugSync(bool sync) noexcept

    {

        mImpl->setDebugSync(sync);

    }


    bool getDebugSync() const noexcept

    {

        return mImpl->getDebugSync();

    }


    void setProfiler(IProfiler* profiler) noexcept

    {

        mImpl->setProfiler(profiler);

    }


    IProfiler* getProfiler() const noexcept

    {

        return mImpl->getProfiler();

    }


    const ICudaEngine& getEngine() const noexcept

    {

        return mImpl->getEngine();

    }


    TRT_DEPRECATED void destroy() noexcept

    {

        delete this;

    }


    void setName(const char* name) noexcept

    {

        mImpl->setName(name);

    }


    const char* getName() const noexcept

    {

        return mImpl->getName();

    }


    TRT_DEPRECATED

    void setDeviceMemory(void* memory) noexcept

    {

        mImpl->setDeviceMemory(memory);

    }


    Dims getStrides(int32_t bindingIndex) const noexcept

    {

        return mImpl->getStrides(bindingIndex);

    }


public:

    TRT_DEPRECATED

    bool setOptimizationProfile(int32_t profileIndex) noexcept

    {

        return mImpl->setOptimizationProfile(profileIndex);

    }


    int32_t getOptimizationProfile() const noexcept

    {

        return mImpl->getOptimizationProfile();

    }


    bool setBindingDimensions(int32_t bindingIndex, Dims dimensions) noexcept

    {

        return mImpl->setBindingDimensions(bindingIndex, dimensions);

    }


    Dims getBindingDimensions(int32_t bindingIndex) const noexcept

    {

        return mImpl->getBindingDimensions(bindingIndex);

    }


    bool setInputShapeBinding(int32_t bindingIndex, int32_t const* data) noexcept

    {

        return mImpl->setInputShapeBinding(bindingIndex, data);

    }


    bool getShapeBinding(int32_t bindingIndex, int32_t* data) const noexcept

    {

        return mImpl->getShapeBinding(bindingIndex, data);

    }


    bool allInputDimensionsSpecified() const noexcept

    {

        return mImpl->allInputDimensionsSpecified();

    }


    bool allInputShapesSpecified() const noexcept


    {

        return mImpl->allInputShapesSpecified();

    }


    //

    void setErrorRecorder(IErrorRecorder* recorder) noexcept

    {

        mImpl->setErrorRecorder(recorder);

    }


    IErrorRecorder* getErrorRecorder() const noexcept

    {

        return mImpl->getErrorRecorder();

    }


    bool executeV2(void* const* bindings) noexcept

    {

        return mImpl->executeV2(bindings);

    }


    bool enqueueV2(void* const* bindings, cudaStream_t stream, cudaEvent_t* inputConsumed) noexcept

    {

        return mImpl->enqueueV2(bindings, stream, inputConsumed);

    }


    bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept

    {

        return mImpl->setOptimizationProfileAsync(profileIndex, stream);

    }


protected:

    apiv::VExecutionContext* mImpl;

}; // class IExecutionContext

} // namespace nvinfer1


extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(void* logger, int32_t version) noexcept;


extern "C" TENSORRTAPI void* createInferRefitter_INTERNAL(void* engine, void* logger, int32_t version) noexcept;


extern "C" TENSORRTAPI nvinfer1::IPluginRegistry* getPluginRegistry() noexcept;


extern "C" TENSORRTAPI nvinfer1::ILogger* getLogger() noexcept;


namespace nvinfer1

{

namespace // unnamed namespace avoids linkage surprises when linking objects built with different versions of this

          // header.

{

inline IRuntime* createInferRuntime(ILogger& logger) noexcept

{

    return static_cast<IRuntime*>(createInferRuntime_INTERNAL(&logger, NV_TENSORRT_VERSION));

}


inline IRefitter* createInferRefitter(ICudaEngine& engine, ILogger& logger) noexcept

{

    return static_cast<IRefitter*>(createInferRefitter_INTERNAL(&engine, &logger, NV_TENSORRT_VERSION));

}


} // namespace


template <typename T>

class PluginRegistrar

{

public:

    PluginRegistrar()

    {

        getPluginRegistry()->registerCreator(instance, "");

    }


private:

    T instance{};

};


} // namespace nvinfer1


#define REGISTER_TENSORRT_PLUGIN(name)                                                                                 \

    static nvinfer1::PluginRegistrar<name> pluginRegistrar##name {}

#endif // NV_INFER_RUNTIME_H