dev-guide/sdk-api/tritonserver_8h_source.html

// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.

//

// Redistribution and use in source and binary forms, with or without

// modification, are permitted provided that the following conditions

// are met:

//  * Redistributions of source code must retain the above copyright

//    notice, this list of conditions and the following disclaimer.

//  * Redistributions in binary form must reproduce the above copyright

//    notice, this list of conditions and the following disclaimer in the

//    documentation and/or other materials provided with the distribution.

//  * Neither the name of NVIDIA CORPORATION nor the names of its

//    contributors may be used to endorse or promote products derived

//    from this software without specific prior written permission.

//

// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY

// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR

// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR

// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY

// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#pragma once


#include <stdbool.h>

#include <stddef.h>

#include <stdint.h>


#ifdef __cplusplus

extern "C" {

#endif


#ifdef _COMPILING_TRITONSERVER

#if defined(_MSC_VER)

#define TRITONSERVER_DECLSPEC __declspec(dllexport)

#elif defined(__GNUC__)

#define TRITONSERVER_DECLSPEC __attribute__((__visibility__("default")))

#else

#define TRITONSERVER_DECLSPEC

#endif

#else

#if defined(_MSC_VER)

#define TRITONSERVER_DECLSPEC __declspec(dllimport)

#else

#define TRITONSERVER_DECLSPEC

#endif

#endif


struct TRITONSERVER_BufferAttributes;

struct TRITONSERVER_Error;

struct TRITONSERVER_InferenceRequest;

struct TRITONSERVER_InferenceResponse;

struct TRITONSERVER_InferenceTrace;

struct TRITONSERVER_Message;

struct TRITONSERVER_Metrics;

struct TRITONSERVER_Parameter;

struct TRITONSERVER_ResponseAllocator;

struct TRITONSERVER_Server;

struct TRITONSERVER_ServerOptions;

struct TRITONSERVER_Metric;

struct TRITONSERVER_MetricFamily;


#define TRITONSERVER_API_VERSION_MAJOR 1

#ifdef IS_TEGRA

#define TRITONSERVER_API_VERSION_MINOR 29

#else

#define TRITONSERVER_API_VERSION_MINOR 25

#endif


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ApiVersion(

    uint32_t* major, uint32_t* minor);


typedef enum TRITONSERVER_datatype_enum {

  TRITONSERVER_TYPE_INVALID,

  TRITONSERVER_TYPE_BOOL,

  TRITONSERVER_TYPE_UINT8,

  TRITONSERVER_TYPE_UINT16,

  TRITONSERVER_TYPE_UINT32,

  TRITONSERVER_TYPE_UINT64,

  TRITONSERVER_TYPE_INT8,

  TRITONSERVER_TYPE_INT16,

  TRITONSERVER_TYPE_INT32,

  TRITONSERVER_TYPE_INT64,

  TRITONSERVER_TYPE_FP16,

  TRITONSERVER_TYPE_FP32,

  TRITONSERVER_TYPE_FP64,

  TRITONSERVER_TYPE_BYTES,

  TRITONSERVER_TYPE_BF16

} TRITONSERVER_DataType;


TRITONSERVER_DECLSPEC const char* TRITONSERVER_DataTypeString(

    TRITONSERVER_DataType datatype);


TRITONSERVER_DECLSPEC TRITONSERVER_DataType

TRITONSERVER_StringToDataType(const char* dtype);


TRITONSERVER_DECLSPEC uint32_t

TRITONSERVER_DataTypeByteSize(TRITONSERVER_DataType datatype);


typedef enum TRITONSERVER_memorytype_enum {

  TRITONSERVER_MEMORY_CPU,

  TRITONSERVER_MEMORY_CPU_PINNED,

  TRITONSERVER_MEMORY_GPU

} TRITONSERVER_MemoryType;


TRITONSERVER_DECLSPEC const char* TRITONSERVER_MemoryTypeString(

    TRITONSERVER_MemoryType memtype);


typedef enum TRITONSERVER_parametertype_enum {

  TRITONSERVER_PARAMETER_STRING,

  TRITONSERVER_PARAMETER_INT,

  TRITONSERVER_PARAMETER_BOOL,

#ifdef IS_TEGRA

  TRITONSERVER_PARAMETER_DOUBLE,

#endif

  TRITONSERVER_PARAMETER_BYTES

} TRITONSERVER_ParameterType;


TRITONSERVER_DECLSPEC const char* TRITONSERVER_ParameterTypeString(

    TRITONSERVER_ParameterType paramtype);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Parameter* TRITONSERVER_ParameterNew(

    const char* name, const TRITONSERVER_ParameterType type, const void* value);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Parameter*

TRITONSERVER_ParameterBytesNew(

    const char* name, const void* byte_ptr, const uint64_t size);


TRITONSERVER_DECLSPEC void TRITONSERVER_ParameterDelete(

    struct TRITONSERVER_Parameter* parameter);


typedef enum TRITONSERVER_instancegroupkind_enum {

  TRITONSERVER_INSTANCEGROUPKIND_AUTO,

  TRITONSERVER_INSTANCEGROUPKIND_CPU,

  TRITONSERVER_INSTANCEGROUPKIND_GPU,

  TRITONSERVER_INSTANCEGROUPKIND_MODEL

} TRITONSERVER_InstanceGroupKind;


TRITONSERVER_DECLSPEC const char* TRITONSERVER_InstanceGroupKindString(

    TRITONSERVER_InstanceGroupKind kind);


typedef enum TRITONSERVER_loglevel_enum {

  TRITONSERVER_LOG_INFO,

  TRITONSERVER_LOG_WARN,

  TRITONSERVER_LOG_ERROR,

  TRITONSERVER_LOG_VERBOSE

} TRITONSERVER_LogLevel;


typedef enum TRITONSERVER_logformat_enum {

  TRITONSERVER_LOG_DEFAULT,

  TRITONSERVER_LOG_ISO8601

} TRITONSERVER_LogFormat;


TRITONSERVER_DECLSPEC bool TRITONSERVER_LogIsEnabled(

    TRITONSERVER_LogLevel level);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_LogMessage(

    TRITONSERVER_LogLevel level, const char* filename, const int line,

    const char* msg);


typedef enum TRITONSERVER_errorcode_enum {

  TRITONSERVER_ERROR_UNKNOWN,

  TRITONSERVER_ERROR_INTERNAL,

  TRITONSERVER_ERROR_NOT_FOUND,

  TRITONSERVER_ERROR_INVALID_ARG,

  TRITONSERVER_ERROR_UNAVAILABLE,

  TRITONSERVER_ERROR_UNSUPPORTED,

  TRITONSERVER_ERROR_ALREADY_EXISTS,

  TRITONSERVER_ERROR_CANCELLED

} TRITONSERVER_Error_Code;


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ErrorNew(

    TRITONSERVER_Error_Code code, const char* msg);


TRITONSERVER_DECLSPEC void TRITONSERVER_ErrorDelete(

    struct TRITONSERVER_Error* error);


TRITONSERVER_DECLSPEC TRITONSERVER_Error_Code

TRITONSERVER_ErrorCode(struct TRITONSERVER_Error* error);


TRITONSERVER_DECLSPEC const char* TRITONSERVER_ErrorCodeString(

    struct TRITONSERVER_Error* error);


TRITONSERVER_DECLSPEC const char* TRITONSERVER_ErrorMessage(

    struct TRITONSERVER_Error* error);


typedef struct TRITONSERVER_Error* (*TRITONSERVER_ResponseAllocatorAllocFn_t)(

    struct TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,

    size_t byte_size, TRITONSERVER_MemoryType memory_type,

    int64_t memory_type_id, void* userp, void** buffer, void** buffer_userp,

    TRITONSERVER_MemoryType* actual_memory_type,

    int64_t* actual_memory_type_id);


typedef struct TRITONSERVER_Error* (

    *TRITONSERVER_ResponseAllocatorBufferAttributesFn_t)(

    struct TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,

    struct TRITONSERVER_BufferAttributes* buffer_attributes, void* userp,

    void* buffer_userp);


typedef struct TRITONSERVER_Error* (*TRITONSERVER_ResponseAllocatorQueryFn_t)(

    struct TRITONSERVER_ResponseAllocator* allocator, void* userp,

    const char* tensor_name, size_t* byte_size,

    TRITONSERVER_MemoryType* memory_type, int64_t* memory_type_id);


typedef struct TRITONSERVER_Error* (*TRITONSERVER_ResponseAllocatorReleaseFn_t)(

    struct TRITONSERVER_ResponseAllocator* allocator, void* buffer,

    void* buffer_userp, size_t byte_size, TRITONSERVER_MemoryType memory_type,

    int64_t memory_type_id);


typedef struct TRITONSERVER_Error* (*TRITONSERVER_ResponseAllocatorStartFn_t)(

    struct TRITONSERVER_ResponseAllocator* allocator, void* userp);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ResponseAllocatorNew(

    struct TRITONSERVER_ResponseAllocator** allocator,

    TRITONSERVER_ResponseAllocatorAllocFn_t alloc_fn,

    TRITONSERVER_ResponseAllocatorReleaseFn_t release_fn,

    TRITONSERVER_ResponseAllocatorStartFn_t start_fn);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ResponseAllocatorSetBufferAttributesFunction(

    struct TRITONSERVER_ResponseAllocator* allocator,

    TRITONSERVER_ResponseAllocatorBufferAttributesFn_t buffer_attributes_fn);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ResponseAllocatorSetQueryFunction(

    struct TRITONSERVER_ResponseAllocator* allocator,

    TRITONSERVER_ResponseAllocatorQueryFn_t query_fn);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ResponseAllocatorDelete(

    struct TRITONSERVER_ResponseAllocator* allocator);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_MessageNewFromSerializedJson(

    struct TRITONSERVER_Message** message, const char* base, size_t byte_size);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MessageDelete(

    struct TRITONSERVER_Message* message);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_MessageSerializeToJson(

    struct TRITONSERVER_Message* message, const char** base, size_t* byte_size);


typedef enum tritonserver_metricformat_enum {

  TRITONSERVER_METRIC_PROMETHEUS

} TRITONSERVER_MetricFormat;


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricsDelete(

    struct TRITONSERVER_Metrics* metrics);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricsFormatted(

    struct TRITONSERVER_Metrics* metrics, TRITONSERVER_MetricFormat format,

    const char** base, size_t* byte_size);


typedef enum tritonserver_tracelevel_enum {

  TRITONSERVER_TRACE_LEVEL_DISABLED = 0,

  TRITONSERVER_TRACE_LEVEL_MIN = 1,

  TRITONSERVER_TRACE_LEVEL_MAX = 2,

  TRITONSERVER_TRACE_LEVEL_TIMESTAMPS = 0x4,

  TRITONSERVER_TRACE_LEVEL_TENSORS = 0x8

} TRITONSERVER_InferenceTraceLevel;


TRITONSERVER_DECLSPEC const char* TRITONSERVER_InferenceTraceLevelString(

    TRITONSERVER_InferenceTraceLevel level);


typedef enum tritonserver_traceactivity_enum {

  TRITONSERVER_TRACE_REQUEST_START = 0,

  TRITONSERVER_TRACE_QUEUE_START = 1,

  TRITONSERVER_TRACE_COMPUTE_START = 2,

  TRITONSERVER_TRACE_COMPUTE_INPUT_END = 3,

  TRITONSERVER_TRACE_COMPUTE_OUTPUT_START = 4,

  TRITONSERVER_TRACE_COMPUTE_END = 5,

  TRITONSERVER_TRACE_REQUEST_END = 6,

  TRITONSERVER_TRACE_TENSOR_QUEUE_INPUT = 7,

  TRITONSERVER_TRACE_TENSOR_BACKEND_INPUT = 8,

  TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT = 9

} TRITONSERVER_InferenceTraceActivity;


TRITONSERVER_DECLSPEC const char* TRITONSERVER_InferenceTraceActivityString(

    TRITONSERVER_InferenceTraceActivity activity);


typedef void (*TRITONSERVER_InferenceTraceActivityFn_t)(

    struct TRITONSERVER_InferenceTrace* trace,

    TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns,

    void* userp);


typedef void (*TRITONSERVER_InferenceTraceTensorActivityFn_t)(

    struct TRITONSERVER_InferenceTrace* trace,

    TRITONSERVER_InferenceTraceActivity activity, const char* name,

    TRITONSERVER_DataType datatype, const void* base, size_t byte_size,

    const int64_t* shape, uint64_t dim_count,

    TRITONSERVER_MemoryType memory_type, int64_t memory_type_id, void* userp);


typedef void (*TRITONSERVER_InferenceTraceReleaseFn_t)(

    struct TRITONSERVER_InferenceTrace* trace, void* userp);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceTraceNew(

    struct TRITONSERVER_InferenceTrace** trace,

    TRITONSERVER_InferenceTraceLevel level, uint64_t parent_id,

    TRITONSERVER_InferenceTraceActivityFn_t activity_fn,

    TRITONSERVER_InferenceTraceReleaseFn_t release_fn, void* trace_userp);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceTraceTensorNew(

    struct TRITONSERVER_InferenceTrace** trace,

    TRITONSERVER_InferenceTraceLevel level, uint64_t parent_id,

    TRITONSERVER_InferenceTraceActivityFn_t activity_fn,

    TRITONSERVER_InferenceTraceTensorActivityFn_t tensor_activity_fn,

    TRITONSERVER_InferenceTraceReleaseFn_t release_fn, void* trace_userp);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceTraceDelete(struct TRITONSERVER_InferenceTrace* trace);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_InferenceTraceId(

    struct TRITONSERVER_InferenceTrace* trace, uint64_t* id);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceTraceParentId(

    struct TRITONSERVER_InferenceTrace* trace, uint64_t* parent_id);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceTraceModelName(

    struct TRITONSERVER_InferenceTrace* trace, const char** model_name);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceTraceModelVersion(

    struct TRITONSERVER_InferenceTrace* trace, int64_t* model_version);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceTraceRequestId(

    struct TRITONSERVER_InferenceTrace* trace, const char** request_id);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceTraceSpawnChildTrace(

    struct TRITONSERVER_InferenceTrace* trace,

    struct TRITONSERVER_InferenceTrace** child_trace);


#ifdef IS_TEGRA

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceTraceSetContext(

    struct TRITONSERVER_InferenceTrace* trace, const char* trace_context);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceTraceContext(

    struct TRITONSERVER_InferenceTrace* trace, const char** trace_context);

#endif


typedef enum tritonserver_requestflag_enum {

  TRITONSERVER_REQUEST_FLAG_SEQUENCE_START = 1,

  TRITONSERVER_REQUEST_FLAG_SEQUENCE_END = 2

} TRITONSERVER_RequestFlag;


typedef enum tritonserver_requestreleaseflag_enum {

  TRITONSERVER_REQUEST_RELEASE_ALL = 1

#ifdef IS_TEGRA

  ,TRITONSERVER_REQUEST_RELEASE_RESCHEDULE = 2

#endif

} TRITONSERVER_RequestReleaseFlag;


typedef enum tritonserver_responsecompleteflag_enum {

  TRITONSERVER_RESPONSE_COMPLETE_FINAL = 1

} TRITONSERVER_ResponseCompleteFlag;


typedef void (*TRITONSERVER_InferenceRequestReleaseFn_t)(

    struct TRITONSERVER_InferenceRequest* request, const uint32_t flags,

    void* userp);


typedef void (*TRITONSERVER_InferenceResponseCompleteFn_t)(

    struct TRITONSERVER_InferenceResponse* response, const uint32_t flags,

    void* userp);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestNew(

    struct TRITONSERVER_InferenceRequest** inference_request,

    struct TRITONSERVER_Server* server, const char* model_name,

    const int64_t model_version);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestDelete(

    struct TRITONSERVER_InferenceRequest* inference_request);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestId(

    struct TRITONSERVER_InferenceRequest* inference_request, const char** id);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestSetId(

    struct TRITONSERVER_InferenceRequest* inference_request, const char* id);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestFlags(

    struct TRITONSERVER_InferenceRequest* inference_request, uint32_t* flags);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestSetFlags(

    struct TRITONSERVER_InferenceRequest* inference_request, uint32_t flags);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestCorrelationId(

    struct TRITONSERVER_InferenceRequest* inference_request,

    uint64_t* correlation_id);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestCorrelationIdString(

    struct TRITONSERVER_InferenceRequest* inference_request,

    const char** correlation_id);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestSetCorrelationId(

    struct TRITONSERVER_InferenceRequest* inference_request,

    uint64_t correlation_id);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestSetCorrelationIdString(

    struct TRITONSERVER_InferenceRequest* inference_request,

    const char* correlation_id);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestCancel(

    struct TRITONSERVER_InferenceRequest* inference_request);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestIsCancelled(

    struct TRITONSERVER_InferenceRequest* inference_request,

    bool* is_cancelled);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestPriority(

    struct TRITONSERVER_InferenceRequest* inference_request,

    uint32_t* priority);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestPriorityUInt64(

    struct TRITONSERVER_InferenceRequest* inference_request,

    uint64_t* priority);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestSetPriority(

    struct TRITONSERVER_InferenceRequest* inference_request, uint32_t priority);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestSetPriorityUInt64(

    struct TRITONSERVER_InferenceRequest* inference_request, uint64_t priority);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestTimeoutMicroseconds(

    struct TRITONSERVER_InferenceRequest* inference_request,

    uint64_t* timeout_us);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestSetTimeoutMicroseconds(

    struct TRITONSERVER_InferenceRequest* inference_request,

    uint64_t timeout_us);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestAddInput(

    struct TRITONSERVER_InferenceRequest* inference_request, const char* name,

    const TRITONSERVER_DataType datatype, const int64_t* shape,

    uint64_t dim_count);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestAddRawInput(

    struct TRITONSERVER_InferenceRequest* inference_request, const char* name);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestRemoveInput(

    struct TRITONSERVER_InferenceRequest* inference_request, const char* name);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestRemoveAllInputs(

    struct TRITONSERVER_InferenceRequest* inference_request);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestAppendInputData(

    struct TRITONSERVER_InferenceRequest* inference_request, const char* name,

    const void* base, size_t byte_size, TRITONSERVER_MemoryType memory_type,

    int64_t memory_type_id);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(

    struct TRITONSERVER_InferenceRequest* inference_request, const char* name,

    const void* base, size_t byte_size, TRITONSERVER_MemoryType memory_type,

    int64_t memory_type_id, const char* host_policy_name);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestAppendInputDataWithBufferAttributes(

    struct TRITONSERVER_InferenceRequest* inference_request, const char* name,

    const void* base, struct TRITONSERVER_BufferAttributes* buffer_attributes);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestRemoveAllInputData(

    struct TRITONSERVER_InferenceRequest* inference_request, const char* name);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestAddRequestedOutput(

    struct TRITONSERVER_InferenceRequest* inference_request, const char* name);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestRemoveRequestedOutput(

    struct TRITONSERVER_InferenceRequest* inference_request, const char* name);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestRemoveAllRequestedOutputs(

    struct TRITONSERVER_InferenceRequest* inference_request);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestSetReleaseCallback(

    struct TRITONSERVER_InferenceRequest* inference_request,

    TRITONSERVER_InferenceRequestReleaseFn_t request_release_fn,

    void* request_release_userp);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestSetResponseCallback(

    struct TRITONSERVER_InferenceRequest* inference_request,

    struct TRITONSERVER_ResponseAllocator* response_allocator,

    void* response_allocator_userp,

    TRITONSERVER_InferenceResponseCompleteFn_t response_fn,

    void* response_userp);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestSetStringParameter(

    struct TRITONSERVER_InferenceRequest* request, const char* key,

    const char* value);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestSetIntParameter(

    struct TRITONSERVER_InferenceRequest* request, const char* key,

    const int64_t value);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestSetBoolParameter(

    struct TRITONSERVER_InferenceRequest* request, const char* key,

    const bool value);


#ifdef IS_TEGRA

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceRequestSetDoubleParameter(

    struct TRITONSERVER_InferenceRequest* request, const char* key,

    const double value);

#endif


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceResponseDelete(

    struct TRITONSERVER_InferenceResponse* inference_response);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceResponseError(

    struct TRITONSERVER_InferenceResponse* inference_response);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceResponseModel(

    struct TRITONSERVER_InferenceResponse* inference_response,

    const char** model_name, int64_t* model_version);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceResponseId(

    struct TRITONSERVER_InferenceResponse* inference_response,

    const char** request_id);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceResponseParameterCount(

    struct TRITONSERVER_InferenceResponse* inference_response, uint32_t* count);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceResponseParameter(

    struct TRITONSERVER_InferenceResponse* inference_response,

    const uint32_t index, const char** name, TRITONSERVER_ParameterType* type,

    const void** vvalue);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceResponseOutputCount(

    struct TRITONSERVER_InferenceResponse* inference_response, uint32_t* count);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceResponseOutput(

    struct TRITONSERVER_InferenceResponse* inference_response,

    const uint32_t index, const char** name, TRITONSERVER_DataType* datatype,

    const int64_t** shape, uint64_t* dim_count, const void** base,

    size_t* byte_size, TRITONSERVER_MemoryType* memory_type,

    int64_t* memory_type_id, void** userp);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_InferenceResponseOutputClassificationLabel(

    struct TRITONSERVER_InferenceResponse* inference_response,

    const uint32_t index, const size_t class_index, const char** label);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_BufferAttributesNew(

    struct TRITONSERVER_BufferAttributes** buffer_attributes);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_BufferAttributesDelete(

    struct TRITONSERVER_BufferAttributes* buffer_attributes);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_BufferAttributesSetMemoryTypeId(

    struct TRITONSERVER_BufferAttributes* buffer_attributes,

    int64_t memory_type_id);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_BufferAttributesSetMemoryType(

    struct TRITONSERVER_BufferAttributes* buffer_attributes,

    TRITONSERVER_MemoryType memory_type);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_BufferAttributesSetCudaIpcHandle(

    struct TRITONSERVER_BufferAttributes* buffer_attributes,

    void* cuda_ipc_handle);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_BufferAttributesSetByteSize(

    struct TRITONSERVER_BufferAttributes* buffer_attributes, size_t byte_size);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_BufferAttributesMemoryTypeId(

    struct TRITONSERVER_BufferAttributes* buffer_attributes,

    int64_t* memory_type_id);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_BufferAttributesMemoryType(

    struct TRITONSERVER_BufferAttributes* buffer_attributes,

    TRITONSERVER_MemoryType* memory_type);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_BufferAttributesCudaIpcHandle(

    struct TRITONSERVER_BufferAttributes* buffer_attributes,

    void** cuda_ipc_handle);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_BufferAttributesByteSize(

    struct TRITONSERVER_BufferAttributes* buffer_attributes, size_t* byte_size);


typedef enum tritonserver_modelcontrolmode_enum {

  TRITONSERVER_MODEL_CONTROL_NONE,

  TRITONSERVER_MODEL_CONTROL_POLL,

  TRITONSERVER_MODEL_CONTROL_EXPLICIT

} TRITONSERVER_ModelControlMode;


typedef enum tritonserver_ratelimitmode_enum {

  TRITONSERVER_RATE_LIMIT_OFF,

  TRITONSERVER_RATE_LIMIT_EXEC_COUNT

} TRITONSERVER_RateLimitMode;


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerOptionsNew(

    struct TRITONSERVER_ServerOptions** options);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsDelete(struct TRITONSERVER_ServerOptions* options);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetServerId(

    struct TRITONSERVER_ServerOptions* options, const char* server_id);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetModelRepositoryPath(

    struct TRITONSERVER_ServerOptions* options,

    const char* model_repository_path);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetModelControlMode(

    struct TRITONSERVER_ServerOptions* options,

    TRITONSERVER_ModelControlMode mode);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetStartupModel(

    struct TRITONSERVER_ServerOptions* options, const char* model_name);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetStrictModelConfig(

    struct TRITONSERVER_ServerOptions* options, bool strict);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetRateLimiterMode(

    struct TRITONSERVER_ServerOptions* options,

    TRITONSERVER_RateLimitMode mode);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsAddRateLimiterResource(

    struct TRITONSERVER_ServerOptions* options, const char* resource_name,

    const size_t resource_count, const int device);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetPinnedMemoryPoolByteSize(

    struct TRITONSERVER_ServerOptions* options, uint64_t size);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetCudaMemoryPoolByteSize(

    struct TRITONSERVER_ServerOptions* options, int gpu_device, uint64_t size);


#ifdef IS_TEGRA

TRITONSERVER_DECLSPEC TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetCudaVirtualAddressSize(

    TRITONSERVER_ServerOptions* options, int gpu_device,

    size_t cuda_virtual_address_size);

#endif


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetResponseCacheByteSize(

    struct TRITONSERVER_ServerOptions* options, uint64_t size);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetCacheConfig(

    struct TRITONSERVER_ServerOptions* options, const char* cache_name,

    const char* config_json);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetCacheDirectory(

    struct TRITONSERVER_ServerOptions* options, const char* cache_dir);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(

    struct TRITONSERVER_ServerOptions* options, double cc);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetExitOnError(

    struct TRITONSERVER_ServerOptions* options, bool exit);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetStrictReadiness(

    struct TRITONSERVER_ServerOptions* options, bool strict);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetExitTimeout(

    struct TRITONSERVER_ServerOptions* options, unsigned int timeout);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetBufferManagerThreadCount(

    struct TRITONSERVER_ServerOptions* options, unsigned int thread_count);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetModelLoadThreadCount(

    struct TRITONSERVER_ServerOptions* options, unsigned int thread_count);


#ifdef IS_TEGRA

TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetModelLoadRetryCount(

    struct TRITONSERVER_ServerOptions* options, unsigned int retry_count);

#endif


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetModelNamespacing(

    struct TRITONSERVER_ServerOptions* options, bool enable_namespace);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetLogFile(

    struct TRITONSERVER_ServerOptions* options, const char* file);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetLogInfo(

    struct TRITONSERVER_ServerOptions* options, bool log);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetLogWarn(

    struct TRITONSERVER_ServerOptions* options, bool log);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetLogError(

    struct TRITONSERVER_ServerOptions* options, bool log);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetLogFormat(

    struct TRITONSERVER_ServerOptions* options,

    const TRITONSERVER_LogFormat format);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetLogVerbose(

    struct TRITONSERVER_ServerOptions* options, int level);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetMetrics(

    struct TRITONSERVER_ServerOptions* options, bool metrics);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetGpuMetrics(

    struct TRITONSERVER_ServerOptions* options, bool gpu_metrics);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetCpuMetrics(

    struct TRITONSERVER_ServerOptions* options, bool cpu_metrics);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetMetricsInterval(

    struct TRITONSERVER_ServerOptions* options, uint64_t metrics_interval_ms);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetBackendDirectory(

    struct TRITONSERVER_ServerOptions* options, const char* backend_dir);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetRepoAgentDirectory(

    struct TRITONSERVER_ServerOptions* options, const char* repoagent_dir);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetModelLoadDeviceLimit(

    struct TRITONSERVER_ServerOptions* options,

    const TRITONSERVER_InstanceGroupKind kind, const int device_id,

    const double fraction);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetBackendConfig(

    struct TRITONSERVER_ServerOptions* options, const char* backend_name,

    const char* setting, const char* value);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetHostPolicy(

    struct TRITONSERVER_ServerOptions* options, const char* policy_name,

    const char* setting, const char* value);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerOptionsSetMetricsConfig(

    struct TRITONSERVER_ServerOptions* options, const char* name,

    const char* setting, const char* value);


typedef enum tritonserver_batchflag_enum {

  TRITONSERVER_BATCH_UNKNOWN = 1,

  TRITONSERVER_BATCH_FIRST_DIM = 2

} TRITONSERVER_ModelBatchFlag;


typedef enum tritonserver_modelindexflag_enum {

  TRITONSERVER_INDEX_FLAG_READY = 1

} TRITONSERVER_ModelIndexFlag;


typedef enum tritonserver_txn_property_flag_enum {

  TRITONSERVER_TXN_ONE_TO_ONE = 1,

  TRITONSERVER_TXN_DECOUPLED = 2

} TRITONSERVER_ModelTxnPropertyFlag;


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerNew(

    struct TRITONSERVER_Server** server,

    struct TRITONSERVER_ServerOptions* options);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerDelete(

    struct TRITONSERVER_Server* server);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerStop(

    struct TRITONSERVER_Server* server);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerRegisterModelRepository(

    struct TRITONSERVER_Server* server, const char* repository_path,

    const struct TRITONSERVER_Parameter** name_mapping,

    const uint32_t mapping_count);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerUnregisterModelRepository(

    struct TRITONSERVER_Server* server, const char* repository_path);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerPollModelRepository(struct TRITONSERVER_Server* server);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerIsLive(

    struct TRITONSERVER_Server* server, bool* live);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerIsReady(

    struct TRITONSERVER_Server* server, bool* ready);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerModelIsReady(

    struct TRITONSERVER_Server* server, const char* model_name,

    const int64_t model_version, bool* ready);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerModelBatchProperties(

    struct TRITONSERVER_Server* server, const char* model_name,

    const int64_t model_version, uint32_t* flags, void** voidp);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerModelTransactionProperties(

    struct TRITONSERVER_Server* server, const char* model_name,

    const int64_t model_version, uint32_t* txn_flags, void** voidp);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerMetadata(

    struct TRITONSERVER_Server* server,

    struct TRITONSERVER_Message** server_metadata);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerModelMetadata(

    struct TRITONSERVER_Server* server, const char* model_name,

    const int64_t model_version, struct TRITONSERVER_Message** model_metadata);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerModelStatistics(

    struct TRITONSERVER_Server* server, const char* model_name,

    const int64_t model_version, struct TRITONSERVER_Message** model_stats);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerModelConfig(

    struct TRITONSERVER_Server* server, const char* model_name,

    const int64_t model_version, const uint32_t config_version,

    struct TRITONSERVER_Message** model_config);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerModelIndex(

    struct TRITONSERVER_Server* server, uint32_t flags,

    struct TRITONSERVER_Message** model_index);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerLoadModel(

    struct TRITONSERVER_Server* server, const char* model_name);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerLoadModelWithParameters(

    struct TRITONSERVER_Server* server, const char* model_name,

    const struct TRITONSERVER_Parameter** parameters,

    const uint64_t parameter_count);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerUnloadModel(

    struct TRITONSERVER_Server* server, const char* model_name);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_ServerUnloadModelAndDependents(

    struct TRITONSERVER_Server* server, const char* model_name);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerMetrics(

    struct TRITONSERVER_Server* server, struct TRITONSERVER_Metrics** metrics);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_ServerInferAsync(

    struct TRITONSERVER_Server* server,

    struct TRITONSERVER_InferenceRequest* inference_request,

    struct TRITONSERVER_InferenceTrace* trace);


typedef enum TRITONSERVER_metrickind_enum {

  TRITONSERVER_METRIC_KIND_COUNTER,

  TRITONSERVER_METRIC_KIND_GAUGE

} TRITONSERVER_MetricKind;


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricFamilyNew(

    struct TRITONSERVER_MetricFamily** family,

    const TRITONSERVER_MetricKind kind, const char* name,

    const char* description);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error*

TRITONSERVER_MetricFamilyDelete(struct TRITONSERVER_MetricFamily* family);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricNew(

    struct TRITONSERVER_Metric** metric,

    struct TRITONSERVER_MetricFamily* family,

    const struct TRITONSERVER_Parameter** labels, const uint64_t label_count);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricDelete(

    struct TRITONSERVER_Metric* metric);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricValue(

    struct TRITONSERVER_Metric* metric, double* value);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricIncrement(

    struct TRITONSERVER_Metric* metric, double value);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_MetricSet(

    struct TRITONSERVER_Metric* metric, double value);


TRITONSERVER_DECLSPEC struct TRITONSERVER_Error* TRITONSERVER_GetMetricKind(

    struct TRITONSERVER_Metric* metric, TRITONSERVER_MetricKind* kind);


#ifdef __cplusplus

}

#endif