NVIDIA DeepStream SDK API Reference

6.4 Release
tritonserver.h File Reference

Go to the source code of this file.

Macros

#define TRITONSERVER_DECLSPEC
 
#define TRITONSERVER_API_VERSION_MAJOR   1
 TRITONSERVER API Version. More...
 
#define TRITONSERVER_API_VERSION_MINOR   17
 

Typedefs

typedef enum TRITONSERVER_datatype_enum TRITONSERVER_DataType
 TRITONSERVER_DataType. More...
 
typedef enum TRITONSERVER_memorytype_enum TRITONSERVER_MemoryType
 TRITONSERVER_MemoryType. More...
 
typedef enum TRITONSERVER_parametertype_enum TRITONSERVER_ParameterType
 TRITONSERVER_ParameterType. More...
 
typedef enum TRITONSERVER_instancegroupkind_enum TRITONSERVER_InstanceGroupKind
 TRITONSERVER_InstanceGroupKind. More...
 
typedef enum TRITONSERVER_loglevel_enum TRITONSERVER_LogLevel
 TRITONSERVER_Logging. More...
 
typedef enum TRITONSERVER_logformat_enum TRITONSERVER_LogFormat
 Format of logging. More...
 
typedef enum TRITONSERVER_errorcode_enum TRITONSERVER_Error_Code
 TRITONSERVER_Error. More...
 
typedef TRITONSERVER_Error *(* TRITONSERVER_ResponseAllocatorAllocFn_t) (TRITONSERVER_ResponseAllocator *allocator, const char *tensor_name, size_t byte_size, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id, void *userp, void **buffer, void **buffer_userp, TRITONSERVER_MemoryType *actual_memory_type, int64_t *actual_memory_type_id)
 TRITONSERVER_ResponseAllocator. More...
 
typedef TRITONSERVER_Error *(* TRITONSERVER_ResponseAllocatorBufferAttributesFn_t) (TRITONSERVER_ResponseAllocator *allocator, const char *tensor_name, TRITONSERVER_BufferAttributes *buffer_attributes, void *userp, void *buffer_userp)
 Type for allocation function that allocates a buffer to hold an output tensor with buffer attributes. More...
 
typedef TRITONSERVER_Error *(* TRITONSERVER_ResponseAllocatorQueryFn_t) (TRITONSERVER_ResponseAllocator *allocator, void *userp, const char *tensor_name, size_t *byte_size, TRITONSERVER_MemoryType *memory_type, int64_t *memory_type_id)
 Type for function that is called to query the allocator's preferred memory type and memory type ID. More...
 
typedef TRITONSERVER_Error *(* TRITONSERVER_ResponseAllocatorReleaseFn_t) (TRITONSERVER_ResponseAllocator *allocator, void *buffer, void *buffer_userp, size_t byte_size, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id)
 Type for function that is called when the server no longer holds any reference to a buffer allocated by TRITONSERVER_ResponseAllocatorAllocFn_t. More...
 
typedef TRITONSERVER_Error *(* TRITONSERVER_ResponseAllocatorStartFn_t) (TRITONSERVER_ResponseAllocator *allocator, void *userp)
 Type for function that is called to indicate that subsequent allocation requests will refer to a new response. More...
 
typedef enum tritonserver_metricformat_enum TRITONSERVER_MetricFormat
 TRITONSERVER_Metrics. More...
 
typedef enum tritonserver_tracelevel_enum TRITONSERVER_InferenceTraceLevel
 TRITONSERVER_InferenceTrace. More...
 
typedef enum tritonserver_traceactivity_enum TRITONSERVER_InferenceTraceActivity
 Trace activities. More...
 
typedef void(* TRITONSERVER_InferenceTraceActivityFn_t) (TRITONSERVER_InferenceTrace *trace, TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns, void *userp)
 Type for trace timeline activity callback function. More...
 
typedef void(* TRITONSERVER_InferenceTraceTensorActivityFn_t) (TRITONSERVER_InferenceTrace *trace, TRITONSERVER_InferenceTraceActivity activity, const char *name, TRITONSERVER_DataType datatype, const void *base, size_t byte_size, const int64_t *shape, uint64_t dim_count, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id, void *userp)
 Type for trace tensor activity callback function. More...
 
typedef void(* TRITONSERVER_InferenceTraceReleaseFn_t) (TRITONSERVER_InferenceTrace *trace, void *userp)
 Type for trace release callback function. More...
 
typedef enum tritonserver_requestflag_enum TRITONSERVER_RequestFlag
 TRITONSERVER_InferenceRequest. More...
 
typedef enum tritonserver_requestreleaseflag_enum TRITONSERVER_RequestReleaseFlag
 Inference request release flags. More...
 
typedef enum tritonserver_responsecompleteflag_enum TRITONSERVER_ResponseCompleteFlag
 Inference response complete flags. More...
 
typedef void(* TRITONSERVER_InferenceRequestReleaseFn_t) (TRITONSERVER_InferenceRequest *request, const uint32_t flags, void *userp)
 Type for inference request release callback function. More...
 
typedef void(* TRITONSERVER_InferenceResponseCompleteFn_t) (TRITONSERVER_InferenceResponse *response, const uint32_t flags, void *userp)
 Type for callback function indicating that an inference response has completed. More...
 
typedef enum tritonserver_modelcontrolmode_enum TRITONSERVER_ModelControlMode
 TRITONSERVER_ServerOptions. More...
 
typedef enum tritonserver_ratelimitmode_enum TRITONSERVER_RateLimitMode
 Rate limit modes. More...
 
typedef enum tritonserver_batchflag_enum TRITONSERVER_ModelBatchFlag
 TRITONSERVER_Server. More...
 
typedef enum tritonserver_modelindexflag_enum TRITONSERVER_ModelIndexFlag
 Model index flags. The enum values must be power-of-2 values. More...
 
typedef enum tritonserver_txn_property_flag_enum TRITONSERVER_ModelTxnPropertyFlag
 Model transaction policy flags. More...
 
typedef enum TRITONSERVER_metrickind_enum TRITONSERVER_MetricKind
 TRITONSERVER_MetricKind. More...
 

Enumerations

enum  TRITONSERVER_datatype_enum {
  TRITONSERVER_TYPE_INVALID,
  TRITONSERVER_TYPE_BOOL,
  TRITONSERVER_TYPE_UINT8,
  TRITONSERVER_TYPE_UINT16,
  TRITONSERVER_TYPE_UINT32,
  TRITONSERVER_TYPE_UINT64,
  TRITONSERVER_TYPE_INT8,
  TRITONSERVER_TYPE_INT16,
  TRITONSERVER_TYPE_INT32,
  TRITONSERVER_TYPE_INT64,
  TRITONSERVER_TYPE_FP16,
  TRITONSERVER_TYPE_FP32,
  TRITONSERVER_TYPE_FP64,
  TRITONSERVER_TYPE_BYTES,
  TRITONSERVER_TYPE_BF16
}
 TRITONSERVER_DataType. More...
 
enum  TRITONSERVER_memorytype_enum {
  TRITONSERVER_MEMORY_CPU,
  TRITONSERVER_MEMORY_CPU_PINNED,
  TRITONSERVER_MEMORY_GPU
}
 TRITONSERVER_MemoryType. More...
 
enum  TRITONSERVER_parametertype_enum {
  TRITONSERVER_PARAMETER_STRING,
  TRITONSERVER_PARAMETER_INT,
  TRITONSERVER_PARAMETER_BOOL,
  TRITONSERVER_PARAMETER_BYTES
}
 TRITONSERVER_ParameterType. More...
 
enum  TRITONSERVER_instancegroupkind_enum {
  TRITONSERVER_INSTANCEGROUPKIND_AUTO,
  TRITONSERVER_INSTANCEGROUPKIND_CPU,
  TRITONSERVER_INSTANCEGROUPKIND_GPU,
  TRITONSERVER_INSTANCEGROUPKIND_MODEL
}
 TRITONSERVER_InstanceGroupKind. More...
 
enum  TRITONSERVER_loglevel_enum {
  TRITONSERVER_LOG_INFO,
  TRITONSERVER_LOG_WARN,
  TRITONSERVER_LOG_ERROR,
  TRITONSERVER_LOG_VERBOSE
}
 TRITONSERVER_Logging. More...
 
enum  TRITONSERVER_logformat_enum {
  TRITONSERVER_LOG_DEFAULT,
  TRITONSERVER_LOG_ISO8601
}
 Format of logging. More...
 
enum  TRITONSERVER_errorcode_enum {
  TRITONSERVER_ERROR_UNKNOWN,
  TRITONSERVER_ERROR_INTERNAL,
  TRITONSERVER_ERROR_NOT_FOUND,
  TRITONSERVER_ERROR_INVALID_ARG,
  TRITONSERVER_ERROR_UNAVAILABLE,
  TRITONSERVER_ERROR_UNSUPPORTED,
  TRITONSERVER_ERROR_ALREADY_EXISTS
}
 TRITONSERVER_Error. More...
 
enum  tritonserver_metricformat_enum { TRITONSERVER_METRIC_PROMETHEUS }
 TRITONSERVER_Metrics. More...
 
enum  tritonserver_tracelevel_enum {
  TRITONSERVER_TRACE_LEVEL_DISABLED = 0,
  TRITONSERVER_TRACE_LEVEL_MIN = 1,
  TRITONSERVER_TRACE_LEVEL_MAX = 2,
  TRITONSERVER_TRACE_LEVEL_TIMESTAMPS = 0x4,
  TRITONSERVER_TRACE_LEVEL_TENSORS = 0x8
}
 TRITONSERVER_InferenceTrace. More...
 
enum  tritonserver_traceactivity_enum {
  TRITONSERVER_TRACE_REQUEST_START = 0,
  TRITONSERVER_TRACE_QUEUE_START = 1,
  TRITONSERVER_TRACE_COMPUTE_START = 2,
  TRITONSERVER_TRACE_COMPUTE_INPUT_END = 3,
  TRITONSERVER_TRACE_COMPUTE_OUTPUT_START = 4,
  TRITONSERVER_TRACE_COMPUTE_END = 5,
  TRITONSERVER_TRACE_REQUEST_END = 6,
  TRITONSERVER_TRACE_TENSOR_QUEUE_INPUT = 7,
  TRITONSERVER_TRACE_TENSOR_BACKEND_INPUT = 8,
  TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT = 9
}
 Trace activities. More...
 
enum  tritonserver_requestflag_enum {
  TRITONSERVER_REQUEST_FLAG_SEQUENCE_START = 1,
  TRITONSERVER_REQUEST_FLAG_SEQUENCE_END = 2
}
 TRITONSERVER_InferenceRequest. More...
 
enum  tritonserver_requestreleaseflag_enum { TRITONSERVER_REQUEST_RELEASE_ALL = 1 }
 Inference request release flags. More...
 
enum  tritonserver_responsecompleteflag_enum { TRITONSERVER_RESPONSE_COMPLETE_FINAL = 1 }
 Inference response complete flags. More...
 
enum  tritonserver_modelcontrolmode_enum {
  TRITONSERVER_MODEL_CONTROL_NONE,
  TRITONSERVER_MODEL_CONTROL_POLL,
  TRITONSERVER_MODEL_CONTROL_EXPLICIT
}
 TRITONSERVER_ServerOptions. More...
 
enum  tritonserver_ratelimitmode_enum {
  TRITONSERVER_RATE_LIMIT_OFF,
  TRITONSERVER_RATE_LIMIT_EXEC_COUNT
}
 Rate limit modes. More...
 
enum  tritonserver_batchflag_enum {
  TRITONSERVER_BATCH_UNKNOWN = 1,
  TRITONSERVER_BATCH_FIRST_DIM = 2
}
 TRITONSERVER_Server. More...
 
enum  tritonserver_modelindexflag_enum { TRITONSERVER_INDEX_FLAG_READY = 1 }
 Model index flags. The enum values must be power-of-2 values. More...
 
enum  tritonserver_txn_property_flag_enum {
  TRITONSERVER_TXN_ONE_TO_ONE = 1,
  TRITONSERVER_TXN_DECOUPLED = 2
}
 Model transaction policy flags. More...
 
enum  TRITONSERVER_metrickind_enum {
  TRITONSERVER_METRIC_KIND_COUNTER,
  TRITONSERVER_METRIC_KIND_GAUGE
}
 TRITONSERVER_MetricKind. More...
 

Functions

TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ApiVersion (uint32_t *major, uint32_t *minor)
 Get the TRITONBACKEND API version supported by the Triton shared library. More...
 
const TRITONSERVER_DECLSPEC char * TRITONSERVER_DataTypeString (TRITONSERVER_DataType datatype)
 Get the string representation of a data type. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_DataType TRITONSERVER_StringToDataType (const char *dtype)
 Get the Triton datatype corresponding to a string representation of a datatype. More...
 
TRITONSERVER_DECLSPEC uint32_t TRITONSERVER_DataTypeByteSize (TRITONSERVER_DataType datatype)
 Get the size of a Triton datatype in bytes. More...
 
const TRITONSERVER_DECLSPEC char * TRITONSERVER_MemoryTypeString (TRITONSERVER_MemoryType memtype)
 Get the string representation of a memory type. More...
 
const TRITONSERVER_DECLSPEC char * TRITONSERVER_ParameterTypeString (TRITONSERVER_ParameterType paramtype)
 Get the string representation of a parameter type. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Parameter * TRITONSERVER_ParameterNew (const char *name, const TRITONSERVER_ParameterType type, const void *value)
 Create a new parameter object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Parameter * TRITONSERVER_ParameterBytesNew (const char *name, const void *byte_ptr, const uint64_t size)
 Create a new parameter object with type TRITONSERVER_PARAMETER_BYTES. More...
 
TRITONSERVER_DECLSPEC void TRITONSERVER_ParameterDelete (TRITONSERVER_Parameter *parameter)
 Delete an parameter object. More...
 
const TRITONSERVER_DECLSPEC char * TRITONSERVER_InstanceGroupKindString (TRITONSERVER_InstanceGroupKind kind)
 Get the string representation of an instance-group kind. More...
 
TRITONSERVER_DECLSPEC bool TRITONSERVER_LogIsEnabled (TRITONSERVER_LogLevel level)
 Is a log level enabled? More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_LogMessage (TRITONSERVER_LogLevel level, const char *filename, const int line, const char *msg)
 Log a message at a given log level if that level is enabled. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ErrorNew (TRITONSERVER_Error_Code code, const char *msg)
 Create a new error object. More...
 
TRITONSERVER_DECLSPEC void TRITONSERVER_ErrorDelete (TRITONSERVER_Error *error)
 Delete an error object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error_Code TRITONSERVER_ErrorCode (TRITONSERVER_Error *error)
 Get the error code. More...
 
const TRITONSERVER_DECLSPEC char * TRITONSERVER_ErrorCodeString (TRITONSERVER_Error *error)
 Get the string representation of an error code. More...
 
const TRITONSERVER_DECLSPEC char * TRITONSERVER_ErrorMessage (TRITONSERVER_Error *error)
 Get the error message. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ResponseAllocatorNew (TRITONSERVER_ResponseAllocator **allocator, TRITONSERVER_ResponseAllocatorAllocFn_t alloc_fn, TRITONSERVER_ResponseAllocatorReleaseFn_t release_fn, TRITONSERVER_ResponseAllocatorStartFn_t start_fn)
 Create a new response allocator object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ResponseAllocatorSetBufferAttributesFunction (TRITONSERVER_ResponseAllocator *allocator, TRITONSERVER_ResponseAllocatorBufferAttributesFn_t buffer_attributes_fn)
 Set the buffer attributes function for a response allocator object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ResponseAllocatorSetQueryFunction (TRITONSERVER_ResponseAllocator *allocator, TRITONSERVER_ResponseAllocatorQueryFn_t query_fn)
 Set the query function to a response allocator object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ResponseAllocatorDelete (TRITONSERVER_ResponseAllocator *allocator)
 Delete a response allocator. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_MessageNewFromSerializedJson (TRITONSERVER_Message **message, const char *base, size_t byte_size)
 TRITONSERVER_Message. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_MessageDelete (TRITONSERVER_Message *message)
 Delete a message object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_MessageSerializeToJson (TRITONSERVER_Message *message, const char **base, size_t *byte_size)
 Get the base and size of the buffer containing the serialized message in JSON format. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_MetricsDelete (TRITONSERVER_Metrics *metrics)
 Delete a metrics object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_MetricsFormatted (TRITONSERVER_Metrics *metrics, TRITONSERVER_MetricFormat format, const char **base, size_t *byte_size)
 Get a buffer containing the metrics in the specified format. More...
 
const TRITONSERVER_DECLSPEC char * TRITONSERVER_InferenceTraceLevelString (TRITONSERVER_InferenceTraceLevel level)
 Get the string representation of a trace level. More...
 
const TRITONSERVER_DECLSPEC char * TRITONSERVER_InferenceTraceActivityString (TRITONSERVER_InferenceTraceActivity activity)
 Get the string representation of a trace activity. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceTraceNew (TRITONSERVER_InferenceTrace **trace, TRITONSERVER_InferenceTraceLevel level, uint64_t parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn, TRITONSERVER_InferenceTraceReleaseFn_t release_fn, void *trace_userp)
 Create a new inference trace object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceTraceTensorNew (TRITONSERVER_InferenceTrace **trace, TRITONSERVER_InferenceTraceLevel level, uint64_t parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn, TRITONSERVER_InferenceTraceTensorActivityFn_t tensor_activity_fn, TRITONSERVER_InferenceTraceReleaseFn_t release_fn, void *trace_userp)
 Create a new inference trace object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceTraceDelete (TRITONSERVER_InferenceTrace *trace)
 Delete a trace object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceTraceId (TRITONSERVER_InferenceTrace *trace, uint64_t *id)
 Get the id associated with a trace. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceTraceParentId (TRITONSERVER_InferenceTrace *trace, uint64_t *parent_id)
 Get the parent id associated with a trace. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceTraceModelName (TRITONSERVER_InferenceTrace *trace, const char **model_name)
 Get the name of the model associated with a trace. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceTraceModelVersion (TRITONSERVER_InferenceTrace *trace, int64_t *model_version)
 Get the version of the model associated with a trace. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestNew (TRITONSERVER_InferenceRequest **inference_request, TRITONSERVER_Server *server, const char *model_name, const int64_t model_version)
 Create a new inference request object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestDelete (TRITONSERVER_InferenceRequest *inference_request)
 Delete an inference request object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestId (TRITONSERVER_InferenceRequest *inference_request, const char **id)
 Get the ID for a request. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetId (TRITONSERVER_InferenceRequest *inference_request, const char *id)
 Set the ID for a request. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestFlags (TRITONSERVER_InferenceRequest *inference_request, uint32_t *flags)
 Get the flag(s) associated with a request. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetFlags (TRITONSERVER_InferenceRequest *inference_request, uint32_t flags)
 Set the flag(s) associated with a request. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestCorrelationId (TRITONSERVER_InferenceRequest *inference_request, uint64_t *correlation_id)
 Get the correlation ID of the inference request as an unsigned integer. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestCorrelationIdString (TRITONSERVER_InferenceRequest *inference_request, const char **correlation_id)
 Get the correlation ID of the inference request as a string. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetCorrelationId (TRITONSERVER_InferenceRequest *inference_request, uint64_t correlation_id)
 Set the correlation ID of the inference request to be an unsigned integer. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetCorrelationIdString (TRITONSERVER_InferenceRequest *inference_request, const char *correlation_id)
 Set the correlation ID of the inference request to be a string. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestPriority (TRITONSERVER_InferenceRequest *inference_request, uint32_t *priority)
 Get the priority for a request. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetPriority (TRITONSERVER_InferenceRequest *inference_request, uint32_t priority)
 Set the priority for a request. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestTimeoutMicroseconds (TRITONSERVER_InferenceRequest *inference_request, uint64_t *timeout_us)
 Get the timeout for a request, in microseconds. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetTimeoutMicroseconds (TRITONSERVER_InferenceRequest *inference_request, uint64_t timeout_us)
 Set the timeout for a request, in microseconds. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestAddInput (TRITONSERVER_InferenceRequest *inference_request, const char *name, const TRITONSERVER_DataType datatype, const int64_t *shape, uint64_t dim_count)
 Add an input to a request. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestAddRawInput (TRITONSERVER_InferenceRequest *inference_request, const char *name)
 Add a raw input to a request. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestRemoveInput (TRITONSERVER_InferenceRequest *inference_request, const char *name)
 Remove an input from a request. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestRemoveAllInputs (TRITONSERVER_InferenceRequest *inference_request)
 Remove all inputs from a request. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestAppendInputData (TRITONSERVER_InferenceRequest *inference_request, const char *name, const void *base, size_t byte_size, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id)
 Assign a buffer of data to an input. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy (TRITONSERVER_InferenceRequest *inference_request, const char *name, const void *base, size_t byte_size, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id, const char *host_policy_name)
 Assign a buffer of data to an input for execution on all model instances with the specified host policy. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestAppendInputDataWithBufferAttributes (TRITONSERVER_InferenceRequest *inference_request, const char *name, const void *base, TRITONSERVER_BufferAttributes *buffer_attributes)
 Assign a buffer of data to an input. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestRemoveAllInputData (TRITONSERVER_InferenceRequest *inference_request, const char *name)
 Clear all input data from an input, releasing ownership of the buffer(s) that were appended to the input with TRITONSERVER_InferenceRequestAppendInputData or TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestAddRequestedOutput (TRITONSERVER_InferenceRequest *inference_request, const char *name)
 Add an output request to an inference request. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestRemoveRequestedOutput (TRITONSERVER_InferenceRequest *inference_request, const char *name)
 Remove an output request from an inference request. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestRemoveAllRequestedOutputs (TRITONSERVER_InferenceRequest *inference_request)
 Remove all output requests from an inference request. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetReleaseCallback (TRITONSERVER_InferenceRequest *inference_request, TRITONSERVER_InferenceRequestReleaseFn_t request_release_fn, void *request_release_userp)
 Set the release callback for an inference request. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceRequestSetResponseCallback (TRITONSERVER_InferenceRequest *inference_request, TRITONSERVER_ResponseAllocator *response_allocator, void *response_allocator_userp, TRITONSERVER_InferenceResponseCompleteFn_t response_fn, void *response_userp)
 Set the allocator and response callback for an inference request. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceResponseDelete (TRITONSERVER_InferenceResponse *inference_response)
 TRITONSERVER_InferenceResponse. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceResponseError (TRITONSERVER_InferenceResponse *inference_response)
 Return the error status of an inference response. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceResponseModel (TRITONSERVER_InferenceResponse *inference_response, const char **model_name, int64_t *model_version)
 Get model used to produce a response. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceResponseId (TRITONSERVER_InferenceResponse *inference_response, const char **request_id)
 Get the ID of the request corresponding to a response. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceResponseParameterCount (TRITONSERVER_InferenceResponse *inference_response, uint32_t *count)
 Get the number of parameters available in the response. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceResponseParameter (TRITONSERVER_InferenceResponse *inference_response, const uint32_t index, const char **name, TRITONSERVER_ParameterType *type, const void **vvalue)
 Get all information about a parameter. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceResponseOutputCount (TRITONSERVER_InferenceResponse *inference_response, uint32_t *count)
 Get the number of outputs available in the response. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceResponseOutput (TRITONSERVER_InferenceResponse *inference_response, const uint32_t index, const char **name, TRITONSERVER_DataType *datatype, const int64_t **shape, uint64_t *dim_count, const void **base, size_t *byte_size, TRITONSERVER_MemoryType *memory_type, int64_t *memory_type_id, void **userp)
 Get all information about an output tensor. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_InferenceResponseOutputClassificationLabel (TRITONSERVER_InferenceResponse *inference_response, const uint32_t index, const size_t class_index, const char **label)
 Get a classification label associated with an output for a given index. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_BufferAttributesNew (TRITONSERVER_BufferAttributes **buffer_attributes)
 TRITONSERVER_BufferAttributes. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_BufferAttributesDelete (TRITONSERVER_BufferAttributes *buffer_attributes)
 Delete a buffer attributes object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_BufferAttributesSetMemoryTypeId (TRITONSERVER_BufferAttributes *buffer_attributes, int64_t memory_type_id)
 Set the memory type id field of the buffer attributes. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_BufferAttributesSetMemoryType (TRITONSERVER_BufferAttributes *buffer_attributes, TRITONSERVER_MemoryType memory_type)
 Set the memory type field of the buffer attributes. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_BufferAttributesSetCudaIpcHandle (TRITONSERVER_BufferAttributes *buffer_attributes, void *cuda_ipc_handle)
 Set the CudaIpcHandle field of the buffer attributes. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_BufferAttributesSetByteSize (TRITONSERVER_BufferAttributes *buffer_attributes, size_t byte_size)
 Set the byte size field of the buffer attributes. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_BufferAttributesMemoryTypeId (TRITONSERVER_BufferAttributes *buffer_attributes, int64_t *memory_type_id)
 Get the memory type id field of the buffer attributes. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_BufferAttributesMemoryType (TRITONSERVER_BufferAttributes *buffer_attributes, TRITONSERVER_MemoryType *memory_type)
 Get the memory type field of the buffer attributes. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_BufferAttributesCudaIpcHandle (TRITONSERVER_BufferAttributes *buffer_attributes, void **cuda_ipc_handle)
 Get the CudaIpcHandle field of the buffer attributes object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_BufferAttributesByteSize (TRITONSERVER_BufferAttributes *buffer_attributes, size_t *byte_size)
 Get the byte size field of the buffer attributes. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsNew (TRITONSERVER_ServerOptions **options)
 Create a new server options object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsDelete (TRITONSERVER_ServerOptions *options)
 Delete a server options object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetServerId (TRITONSERVER_ServerOptions *options, const char *server_id)
 Set the textual ID for the server in a server options. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetModelRepositoryPath (TRITONSERVER_ServerOptions *options, const char *model_repository_path)
 Set the model repository path in a server options. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetModelControlMode (TRITONSERVER_ServerOptions *options, TRITONSERVER_ModelControlMode mode)
 Set the model control mode in a server options. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetStartupModel (TRITONSERVER_ServerOptions *options, const char *model_name)
 Set the model to be loaded at startup in a server options. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetStrictModelConfig (TRITONSERVER_ServerOptions *options, bool strict)
 Enable or disable strict model configuration handling in a server options. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetRateLimiterMode (TRITONSERVER_ServerOptions *options, TRITONSERVER_RateLimitMode mode)
 Set the rate limit mode in a server options. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsAddRateLimiterResource (TRITONSERVER_ServerOptions *options, const char *resource_name, const size_t resource_count, const int device)
 Add resource count for rate limiting. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetPinnedMemoryPoolByteSize (TRITONSERVER_ServerOptions *options, uint64_t size)
 Set the total pinned memory byte size that the server can allocate in a server options. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetCudaMemoryPoolByteSize (TRITONSERVER_ServerOptions *options, int gpu_device, uint64_t size)
 Set the total CUDA memory byte size that the server can allocate on given GPU device in a server options. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetResponseCacheByteSize (TRITONSERVER_ServerOptions *options, uint64_t size)
 Set the total response cache byte size that the server can allocate in CPU memory. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability (TRITONSERVER_ServerOptions *options, double cc)
 Set the minimum support CUDA compute capability in a server options. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetExitOnError (TRITONSERVER_ServerOptions *options, bool exit)
 Enable or disable exit-on-error in a server options. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetStrictReadiness (TRITONSERVER_ServerOptions *options, bool strict)
 Enable or disable strict readiness handling in a server options. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetExitTimeout (TRITONSERVER_ServerOptions *options, unsigned int timeout)
 Set the exit timeout, in seconds, for the server in a server options. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetBufferManagerThreadCount (TRITONSERVER_ServerOptions *options, unsigned int thread_count)
 Set the number of threads used in buffer manager in a server options. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetModelLoadThreadCount (TRITONSERVER_ServerOptions *options, unsigned int thread_count)
 Set the number of threads to concurrently load models in a server options. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetLogFile (TRITONSERVER_ServerOptions *options, const char *file)
 Provide a log output file. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetLogInfo (TRITONSERVER_ServerOptions *options, bool log)
 Enable or disable info level logging. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetLogWarn (TRITONSERVER_ServerOptions *options, bool log)
 Enable or disable warning level logging. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetLogError (TRITONSERVER_ServerOptions *options, bool log)
 Enable or disable error level logging. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetLogFormat (TRITONSERVER_ServerOptions *options, const TRITONSERVER_LogFormat format)
 Set the logging format. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetLogVerbose (TRITONSERVER_ServerOptions *options, int level)
 Set verbose logging level. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetMetrics (TRITONSERVER_ServerOptions *options, bool metrics)
 Enable or disable metrics collection in a server options. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetGpuMetrics (TRITONSERVER_ServerOptions *options, bool gpu_metrics)
 Enable or disable GPU metrics collection in a server options. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetCpuMetrics (TRITONSERVER_ServerOptions *options, bool cpu_metrics)
 Enable or disable CPU metrics collection in a server options. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetMetricsInterval (TRITONSERVER_ServerOptions *options, uint64_t metrics_interval_ms)
 Set the interval for metrics collection in a server options. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetBackendDirectory (TRITONSERVER_ServerOptions *options, const char *backend_dir)
 Set the directory containing backend shared libraries. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetRepoAgentDirectory (TRITONSERVER_ServerOptions *options, const char *repoagent_dir)
 Set the directory containing repository agent shared libraries. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetModelLoadDeviceLimit (TRITONSERVER_ServerOptions *options, const TRITONSERVER_InstanceGroupKind kind, const int device_id, const double fraction)
 Specify the limit on memory usage as a fraction on the device identified by 'kind' and 'device_id'. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetBackendConfig (TRITONSERVER_ServerOptions *options, const char *backend_name, const char *setting, const char *value)
 Set a configuration setting for a named backend in a server options. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerOptionsSetHostPolicy (TRITONSERVER_ServerOptions *options, const char *policy_name, const char *setting, const char *value)
 Set a host policy setting for a given policy name in a server options. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerNew (TRITONSERVER_Server **server, TRITONSERVER_ServerOptions *options)
 Create a new server object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerDelete (TRITONSERVER_Server *server)
 Delete a server object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerStop (TRITONSERVER_Server *server)
 Stop a server object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerRegisterModelRepository (TRITONSERVER_Server *server, const char *repository_path, const TRITONSERVER_Parameter **name_mapping, const uint32_t mapping_count)
 Register a new model repository. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerUnregisterModelRepository (TRITONSERVER_Server *server, const char *repository_path)
 Unregister a model repository. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerPollModelRepository (TRITONSERVER_Server *server)
 Check the model repository for changes and update server state based on those changes. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerIsLive (TRITONSERVER_Server *server, bool *live)
 Is the server live? More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerIsReady (TRITONSERVER_Server *server, bool *ready)
 Is the server ready? More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerModelIsReady (TRITONSERVER_Server *server, const char *model_name, const int64_t model_version, bool *ready)
 Is the model ready? More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerModelBatchProperties (TRITONSERVER_Server *server, const char *model_name, const int64_t model_version, uint32_t *flags, void **voidp)
 Get the batch properties of the model. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerModelTransactionProperties (TRITONSERVER_Server *server, const char *model_name, const int64_t model_version, uint32_t *txn_flags, void **voidp)
 Get the transaction policy of the model. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerMetadata (TRITONSERVER_Server *server, TRITONSERVER_Message **server_metadata)
 Get the metadata of the server as a TRITONSERVER_Message object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerModelMetadata (TRITONSERVER_Server *server, const char *model_name, const int64_t model_version, TRITONSERVER_Message **model_metadata)
 Get the metadata of a model as a TRITONSERVER_Message object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerModelStatistics (TRITONSERVER_Server *server, const char *model_name, const int64_t model_version, TRITONSERVER_Message **model_stats)
 Get the statistics of a model as a TRITONSERVER_Message object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerModelConfig (TRITONSERVER_Server *server, const char *model_name, const int64_t model_version, const uint32_t config_version, TRITONSERVER_Message **model_config)
 Get the configuration of a model as a TRITONSERVER_Message object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerModelIndex (TRITONSERVER_Server *server, uint32_t flags, TRITONSERVER_Message **model_index)
 Get the index of all unique models in the model repositories as a TRITONSERVER_Message object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerLoadModel (TRITONSERVER_Server *server, const char *model_name)
 Load the requested model or reload the model if it is already loaded. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerLoadModelWithParameters (TRITONSERVER_Server *server, const char *model_name, const TRITONSERVER_Parameter **parameters, const uint64_t parameter_count)
 Load the requested model or reload the model if it is already loaded, with load parameters provided. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerUnloadModel (TRITONSERVER_Server *server, const char *model_name)
 Unload the requested model. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerUnloadModelAndDependents (TRITONSERVER_Server *server, const char *model_name)
 Unload the requested model, and also unload any dependent model that was loaded along with the requested model (for example, the models composing an ensemble). More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerMetrics (TRITONSERVER_Server *server, TRITONSERVER_Metrics **metrics)
 Get the current metrics for the server. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_ServerInferAsync (TRITONSERVER_Server *server, TRITONSERVER_InferenceRequest *inference_request, TRITONSERVER_InferenceTrace *trace)
 Perform inference using the meta-data and inputs supplied by the 'inference_request'. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_MetricFamilyNew (TRITONSERVER_MetricFamily **family, const TRITONSERVER_MetricKind kind, const char *name, const char *description)
 Create a new metric family object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_MetricFamilyDelete (TRITONSERVER_MetricFamily *family)
 Delete a metric family object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_MetricNew (TRITONSERVER_Metric **metric, TRITONSERVER_MetricFamily *family, const TRITONSERVER_Parameter **labels, const uint64_t label_count)
 Create a new metric object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_MetricDelete (TRITONSERVER_Metric *metric)
 Delete a metric object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_MetricValue (TRITONSERVER_Metric *metric, double *value)
 Get the current value of a metric object. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_MetricIncrement (TRITONSERVER_Metric *metric, double value)
 Increment the current value of metric by value. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_MetricSet (TRITONSERVER_Metric *metric, double value)
 Set the current value of metric to value. More...
 
TRITONSERVER_DECLSPEC TRITONSERVER_Error * TRITONSERVER_GetMetricKind (TRITONSERVER_Metric *metric, TRITONSERVER_MetricKind *kind)
 Get the TRITONSERVER_MetricKind of metric and its corresponding family. More...
 

Macro Definition Documentation

◆ TRITONSERVER_API_VERSION_MAJOR

#define TRITONSERVER_API_VERSION_MAJOR   1

TRITONSERVER API Version.

The TRITONSERVER API is versioned with major and minor version numbers. Any change to the API that does not impact backwards compatibility (for example, adding a non-required function) increases the minor version number. Any change that breaks backwards compatibility (for example, deleting or changing the behavior of a function) increases the major version number. A client should check that the API version used to compile the client is compatible with the API version of the Triton shared library that it is linking against. This is typically done by code similar to the following which makes sure that the major versions are equal and that the minor version of the Triton shared library is >= the minor version used to build the client.

uint32_t api_version_major, api_version_minor; TRITONSERVER_ApiVersion(&api_version_major, &api_version_minor); if ((api_version_major != TRITONSERVER_API_VERSION_MAJOR) || (api_version_minor < TRITONSERVER_API_VERSION_MINOR)) { return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_UNSUPPORTED, "triton server API version does not support this client"); }

Definition at line 93 of file tritonserver.h.

◆ TRITONSERVER_API_VERSION_MINOR

#define TRITONSERVER_API_VERSION_MINOR   17

Definition at line 94 of file tritonserver.h.

◆ TRITONSERVER_DECLSPEC

#define TRITONSERVER_DECLSPEC

Definition at line 50 of file tritonserver.h.

Typedef Documentation

◆ TRITONSERVER_DataType

TRITONSERVER_DataType.

Tensor data types recognized by TRITONSERVER.

◆ TRITONSERVER_Error_Code

TRITONSERVER_Error.

Errors are reported by a TRITONSERVER_Error object. A NULL TRITONSERVER_Error indicates no error, a non-NULL TRITONSERVER_Error indicates error and the code and message for the error can be retrieved from the object.

The caller takes ownership of a TRITONSERVER_Error object returned by the API and must call TRITONSERVER_ErrorDelete to release the object. The TRITONSERVER_Error error codes

◆ TRITONSERVER_InferenceRequestReleaseFn_t

typedef void(* TRITONSERVER_InferenceRequestReleaseFn_t) (TRITONSERVER_InferenceRequest *request, const uint32_t flags, void *userp)

Type for inference request release callback function.

The callback indicates what type of release is being performed on the request and for some of these the callback function takes ownership of the TRITONSERVER_InferenceRequest object. The 'userp' data is the data provided as 'request_release_userp' in the call to TRITONSERVER_InferenceRequestSetReleaseCallback.

One or more flags will be specified when the callback is invoked, and the callback must take the following actions:

  • TRITONSERVER_REQUEST_RELEASE_ALL: The entire inference request is being released and ownership is passed to the callback function. Triton will not longer access the 'request' object itself nor any input tensor data associated with the request. The callback should free or otherwise manage the 'request' object and all associated tensor data.

Note that currently TRITONSERVER_REQUEST_RELEASE_ALL should always be set when the callback is invoked but in the future that may change, so the callback should explicitly check for the flag before taking ownership of the request object.

Definition at line 909 of file tritonserver.h.

◆ TRITONSERVER_InferenceResponseCompleteFn_t

typedef void(* TRITONSERVER_InferenceResponseCompleteFn_t) (TRITONSERVER_InferenceResponse *response, const uint32_t flags, void *userp)

Type for callback function indicating that an inference response has completed.

The callback function takes ownership of the TRITONSERVER_InferenceResponse object. The 'userp' data is the data provided as 'response_userp' in the call to TRITONSERVER_InferenceRequestSetResponseCallback.

One or more flags may be specified when the callback is invoked:

  • TRITONSERVER_RESPONSE_COMPLETE_FINAL: Indicates that no more responses will be generated for a given request (more specifically, that no more responses will be generated for the inference request that set this callback and 'userp'). When this flag is set 'response' may be a response object or may be nullptr. If 'response' is not nullptr, then 'response' is the last response that Triton will produce for the request. If 'response' is nullptr then Triton is indicating that no more responses will be produced for the request.

Definition at line 929 of file tritonserver.h.

◆ TRITONSERVER_InferenceTraceActivity

◆ TRITONSERVER_InferenceTraceActivityFn_t

typedef void(* TRITONSERVER_InferenceTraceActivityFn_t) (TRITONSERVER_InferenceTrace *trace, TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns, void *userp)

Type for trace timeline activity callback function.

This callback function is used to report activity occurring for a trace. This function does not take ownership of 'trace' and so any information needed from that object must be copied before returning. The 'userp' data is the same as what is supplied in the call to TRITONSERVER_InferenceTraceNew.

Definition at line 728 of file tritonserver.h.

◆ TRITONSERVER_InferenceTraceLevel

TRITONSERVER_InferenceTrace.

Object that represents tracing for an inference request. Trace levels. The trace level controls the type of trace activities that are reported for an inference request.

Trace level values are power-of-2 and can be combined to trace multiple types of activities. For example, use (TRITONSERVER_TRACE_LEVEL_TIMESTAMPS | TRITONSERVER_TRACE_LEVEL_TENSORS) to trace both timestamps and tensors for an inference request.

TRITONSERVER_TRACE_LEVEL_MIN and TRITONSERVER_TRACE_LEVEL_MAX are deprecated and should not be used.

◆ TRITONSERVER_InferenceTraceReleaseFn_t

typedef void(* TRITONSERVER_InferenceTraceReleaseFn_t) (TRITONSERVER_InferenceTrace *trace, void *userp)

Type for trace release callback function.

This callback function is called when all activity for the trace has completed. The callback function takes ownership of the TRITONSERVER_InferenceTrace object. The 'userp' data is the same as what is supplied in the call to TRITONSERVER_InferenceTraceNew.

Definition at line 751 of file tritonserver.h.

◆ TRITONSERVER_InferenceTraceTensorActivityFn_t

typedef void(* TRITONSERVER_InferenceTraceTensorActivityFn_t) (TRITONSERVER_InferenceTrace *trace, TRITONSERVER_InferenceTraceActivity activity, const char *name, TRITONSERVER_DataType datatype, const void *base, size_t byte_size, const int64_t *shape, uint64_t dim_count, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id, void *userp)

Type for trace tensor activity callback function.

This callback function is used to report tensor activity occurring for a trace. This function does not take ownership of 'trace' and so any information needed from that object must be copied before returning. The 'userp' data is the same as what is supplied in the call to TRITONSERVER_InferenceTraceTensorNew.

Definition at line 739 of file tritonserver.h.

◆ TRITONSERVER_InstanceGroupKind

TRITONSERVER_InstanceGroupKind.

Kinds of instance groups recognized by TRITONSERVER.

◆ TRITONSERVER_LogFormat

Format of logging.

TRITONSERVER_LOG_DEFAULT: the log severity (L) and timestamp will be logged as "LMMDD hh:mm:ss.ssssss".

TRITONSERVER_LOG_ISO8601: the log format will be "YYYY-MM-DDThh:mm:ssZ L".

◆ TRITONSERVER_LogLevel

TRITONSERVER_Logging.

Types/levels of logging.

◆ TRITONSERVER_MemoryType

TRITONSERVER_MemoryType.

Types of memory recognized by TRITONSERVER.

◆ TRITONSERVER_MetricFormat

TRITONSERVER_Metrics.

Object representing metrics. Metric format types

◆ TRITONSERVER_MetricKind

TRITONSERVER_MetricKind.

Types of metrics recognized by TRITONSERVER.

◆ TRITONSERVER_ModelBatchFlag

TRITONSERVER_Server.

An inference server. Model batch flags. The enum values must be power-of-2 values.

◆ TRITONSERVER_ModelControlMode

TRITONSERVER_ServerOptions.

Options to use when creating an inference server. Model control modes

◆ TRITONSERVER_ModelIndexFlag

Model index flags. The enum values must be power-of-2 values.

◆ TRITONSERVER_ModelTxnPropertyFlag

Model transaction policy flags.

The enum values must be power-of-2 values.

◆ TRITONSERVER_ParameterType

TRITONSERVER_ParameterType.

Types of parameters recognized by TRITONSERVER.

◆ TRITONSERVER_RateLimitMode

◆ TRITONSERVER_RequestFlag

TRITONSERVER_InferenceRequest.

Object representing an inference request. The inference request provides the meta-data and input tensor values needed for an inference and returns the inference result meta-data and output tensors. An inference request object can be modified and reused multiple times. Inference request flags. The enum values must be power-of-2 values.

◆ TRITONSERVER_RequestReleaseFlag

Inference request release flags.

The enum values must be power-of-2 values.

◆ TRITONSERVER_ResponseAllocatorAllocFn_t

typedef TRITONSERVER_Error*(* TRITONSERVER_ResponseAllocatorAllocFn_t) (TRITONSERVER_ResponseAllocator *allocator, const char *tensor_name, size_t byte_size, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id, void *userp, void **buffer, void **buffer_userp, TRITONSERVER_MemoryType *actual_memory_type, int64_t *actual_memory_type_id)

TRITONSERVER_ResponseAllocator.

Object representing a memory allocator for output tensors in an inference response. Type for allocation function that allocates a buffer to hold an output tensor.

Parameters
allocatorThe allocator that is provided in the call to TRITONSERVER_InferenceRequestSetResponseCallback.
tensor_nameThe name of the output tensor to allocate for.
byte_sizeThe size of the buffer to allocate.
memory_typeThe type of memory that the caller prefers for the buffer allocation.
memory_type_idThe ID of the memory that the caller prefers for the buffer allocation.
userpThe user data pointer that is provided as 'response_allocator_userp' in the call to TRITONSERVER_InferenceRequestSetResponseCallback.
bufferReturns a pointer to the allocated memory.
buffer_userpReturns a user-specified value to associate with the buffer, or nullptr if no user-specified value should be associated with the buffer. This value will be provided in the call to TRITONSERVER_ResponseAllocatorReleaseFn_t when the buffer is released and will also be returned by TRITONSERVER_InferenceResponseOutput.
actual_memory_typeReturns the type of memory where the allocation resides. May be different than the type of memory requested by 'memory_type'.
actual_memory_type_idReturns the ID of the memory where the allocation resides. May be different than the ID of the memory requested by 'memory_type_id'.
Returns
a TRITONSERVER_Error object if a failure occurs while attempting an allocation. If an error is returned all other return values will be ignored.

Definition at line 391 of file tritonserver.h.

◆ TRITONSERVER_ResponseAllocatorBufferAttributesFn_t

typedef TRITONSERVER_Error*( * TRITONSERVER_ResponseAllocatorBufferAttributesFn_t) (TRITONSERVER_ResponseAllocator *allocator, const char *tensor_name, TRITONSERVER_BufferAttributes *buffer_attributes, void *userp, void *buffer_userp)

Type for allocation function that allocates a buffer to hold an output tensor with buffer attributes.

The callback function must fill in the appropriate buffer attributes information related to this buffer. If set, this function is always called after TRITONSERVER_ResponseAllocatorAllocFn_t function.

Parameters
allocatorThe allocator that is provided in the call to TRITONSERVER_InferenceRequestSetResponseCallback.
tensor_nameThe name of the output tensor to allocate for.
buffer_attributesThe buffer attributes associated with the buffer.
userpThe user data pointer that is provided as 'response_allocator_userp' in the call to TRITONSERVER_InferenceRequestSetResponseCallback.
buffer_userpReturns a user-specified value to associate with the buffer, or nullptr if no user-specified value should be associated with the buffer. This value will be provided in the call to TRITONSERVER_ResponseAllocatorReleaseFn_t when the buffer is released and will also be returned by TRITONSERVER_InferenceResponseOutput.
Returns
a TRITONSERVER_Error object if a failure occurs while attempting an allocation. If an error is returned all other return values will be ignored.

Definition at line 421 of file tritonserver.h.

◆ TRITONSERVER_ResponseAllocatorQueryFn_t

typedef TRITONSERVER_Error*(* TRITONSERVER_ResponseAllocatorQueryFn_t) (TRITONSERVER_ResponseAllocator *allocator, void *userp, const char *tensor_name, size_t *byte_size, TRITONSERVER_MemoryType *memory_type, int64_t *memory_type_id)

Type for function that is called to query the allocator's preferred memory type and memory type ID.

As much as possible, the allocator should attempt to return the same memory_type and memory_type_id values that will be returned by the subsequent call to TRITONSERVER_ResponseAllocatorAllocFn_t. But the allocator is not required to do so.

Parameters
allocatorThe allocator that is provided in the call to TRITONSERVER_InferenceRequestSetResponseCallback.
userpThe user data pointer that is provided as 'response_allocator_userp' in the call to TRITONSERVER_InferenceRequestSetResponseCallback.
tensor_nameThe name of the output tensor. This is optional and it should be set to nullptr to indicate that the tensor name has not determined.
byte_sizeThe expected size of the buffer. This is optional and it should be set to nullptr to indicate that the byte size has not determined.
memory_typeActs as both input and output. On input gives the memory type preferred by the caller. Returns memory type preferred by the allocator, taken account of the caller preferred type.
memory_type_idActs as both input and output. On input gives the memory type ID preferred by the caller. Returns memory type ID preferred by the allocator, taken account of the caller preferred type ID.
Returns
a TRITONSERVER_Error object if a failure occurs.

Definition at line 450 of file tritonserver.h.

◆ TRITONSERVER_ResponseAllocatorReleaseFn_t

typedef TRITONSERVER_Error*(* TRITONSERVER_ResponseAllocatorReleaseFn_t) (TRITONSERVER_ResponseAllocator *allocator, void *buffer, void *buffer_userp, size_t byte_size, TRITONSERVER_MemoryType memory_type, int64_t memory_type_id)

Type for function that is called when the server no longer holds any reference to a buffer allocated by TRITONSERVER_ResponseAllocatorAllocFn_t.

In practice this function is typically called when the response object associated with the buffer is deleted by TRITONSERVER_InferenceResponseDelete.

Parameters
allocatorThe allocator that is provided in the call to TRITONSERVER_InferenceRequestSetResponseCallback.
bufferPointer to the buffer to be freed.
buffer_userpThe user-specified value associated with the buffer in TRITONSERVER_ResponseAllocatorAllocFn_t.
byte_sizeThe size of the buffer.
memory_typeThe type of memory holding the buffer.
memory_type_idThe ID of the memory holding the buffer.
Returns
a TRITONSERVER_Error object if a failure occurs while attempting the release. If an error is returned Triton will not attempt to release the buffer again.

Definition at line 472 of file tritonserver.h.

◆ TRITONSERVER_ResponseAllocatorStartFn_t

typedef TRITONSERVER_Error*(* TRITONSERVER_ResponseAllocatorStartFn_t) (TRITONSERVER_ResponseAllocator *allocator, void *userp)

Type for function that is called to indicate that subsequent allocation requests will refer to a new response.

Parameters
allocatorThe allocator that is provided in the call to TRITONSERVER_InferenceRequestSetResponseCallback.
userpThe user data pointer that is provided as 'response_allocator_userp' in the call to TRITONSERVER_InferenceRequestSetResponseCallback.
Returns
a TRITONSERVER_Error object if a failure occurs.

Definition at line 486 of file tritonserver.h.

◆ TRITONSERVER_ResponseCompleteFlag

Inference response complete flags.

The enum values must be power-of-2 values.

Enumeration Type Documentation

◆ tritonserver_batchflag_enum

TRITONSERVER_Server.

An inference server. Model batch flags. The enum values must be power-of-2 values.

Enumerator
TRITONSERVER_BATCH_UNKNOWN 
TRITONSERVER_BATCH_FIRST_DIM 

Definition at line 1931 of file tritonserver.h.

◆ TRITONSERVER_datatype_enum

TRITONSERVER_DataType.

Tensor data types recognized by TRITONSERVER.

Enumerator
TRITONSERVER_TYPE_INVALID 
TRITONSERVER_TYPE_BOOL 
TRITONSERVER_TYPE_UINT8 
TRITONSERVER_TYPE_UINT16 
TRITONSERVER_TYPE_UINT32 
TRITONSERVER_TYPE_UINT64 
TRITONSERVER_TYPE_INT8 
TRITONSERVER_TYPE_INT16 
TRITONSERVER_TYPE_INT32 
TRITONSERVER_TYPE_INT64 
TRITONSERVER_TYPE_FP16 
TRITONSERVER_TYPE_FP32 
TRITONSERVER_TYPE_FP64 
TRITONSERVER_TYPE_BYTES 
TRITONSERVER_TYPE_BF16 

Definition at line 114 of file tritonserver.h.

◆ TRITONSERVER_errorcode_enum

TRITONSERVER_Error.

Errors are reported by a TRITONSERVER_Error object. A NULL TRITONSERVER_Error indicates no error, a non-NULL TRITONSERVER_Error indicates error and the code and message for the error can be retrieved from the object.

The caller takes ownership of a TRITONSERVER_Error object returned by the API and must call TRITONSERVER_ErrorDelete to release the object. The TRITONSERVER_Error error codes

Enumerator
TRITONSERVER_ERROR_UNKNOWN 
TRITONSERVER_ERROR_INTERNAL 
TRITONSERVER_ERROR_NOT_FOUND 
TRITONSERVER_ERROR_INVALID_ARG 
TRITONSERVER_ERROR_UNAVAILABLE 
TRITONSERVER_ERROR_UNSUPPORTED 
TRITONSERVER_ERROR_ALREADY_EXISTS 

Definition at line 303 of file tritonserver.h.

◆ TRITONSERVER_instancegroupkind_enum

TRITONSERVER_InstanceGroupKind.

Kinds of instance groups recognized by TRITONSERVER.

Enumerator
TRITONSERVER_INSTANCEGROUPKIND_AUTO 
TRITONSERVER_INSTANCEGROUPKIND_CPU 
TRITONSERVER_INSTANCEGROUPKIND_GPU 
TRITONSERVER_INSTANCEGROUPKIND_MODEL 

Definition at line 233 of file tritonserver.h.

◆ TRITONSERVER_logformat_enum

Format of logging.

TRITONSERVER_LOG_DEFAULT: the log severity (L) and timestamp will be logged as "LMMDD hh:mm:ss.ssssss".

TRITONSERVER_LOG_ISO8601: the log format will be "YYYY-MM-DDThh:mm:ssZ L".

Enumerator
TRITONSERVER_LOG_DEFAULT 
TRITONSERVER_LOG_ISO8601 

Definition at line 268 of file tritonserver.h.

◆ TRITONSERVER_loglevel_enum

TRITONSERVER_Logging.

Types/levels of logging.

Enumerator
TRITONSERVER_LOG_INFO 
TRITONSERVER_LOG_WARN 
TRITONSERVER_LOG_ERROR 
TRITONSERVER_LOG_VERBOSE 

Definition at line 253 of file tritonserver.h.

◆ TRITONSERVER_memorytype_enum

TRITONSERVER_MemoryType.

Types of memory recognized by TRITONSERVER.

Enumerator
TRITONSERVER_MEMORY_CPU 
TRITONSERVER_MEMORY_CPU_PINNED 
TRITONSERVER_MEMORY_GPU 

Definition at line 162 of file tritonserver.h.

◆ tritonserver_metricformat_enum

TRITONSERVER_Metrics.

Object representing metrics. Metric format types

Enumerator
TRITONSERVER_METRIC_PROMETHEUS 

Definition at line 626 of file tritonserver.h.

◆ TRITONSERVER_metrickind_enum

TRITONSERVER_MetricKind.

Types of metrics recognized by TRITONSERVER.

Enumerator
TRITONSERVER_METRIC_KIND_COUNTER 
TRITONSERVER_METRIC_KIND_GAUGE 

Definition at line 2260 of file tritonserver.h.

◆ tritonserver_modelcontrolmode_enum

TRITONSERVER_ServerOptions.

Options to use when creating an inference server. Model control modes

Enumerator
TRITONSERVER_MODEL_CONTROL_NONE 
TRITONSERVER_MODEL_CONTROL_POLL 
TRITONSERVER_MODEL_CONTROL_EXPLICIT 

Definition at line 1536 of file tritonserver.h.

◆ tritonserver_modelindexflag_enum

Model index flags. The enum values must be power-of-2 values.

Enumerator
TRITONSERVER_INDEX_FLAG_READY 

Definition at line 1937 of file tritonserver.h.

◆ TRITONSERVER_parametertype_enum

TRITONSERVER_ParameterType.

Types of parameters recognized by TRITONSERVER.

Enumerator
TRITONSERVER_PARAMETER_STRING 
TRITONSERVER_PARAMETER_INT 
TRITONSERVER_PARAMETER_BOOL 
TRITONSERVER_PARAMETER_BYTES 

Definition at line 181 of file tritonserver.h.

◆ tritonserver_ratelimitmode_enum

Rate limit modes.

Enumerator
TRITONSERVER_RATE_LIMIT_OFF 
TRITONSERVER_RATE_LIMIT_EXEC_COUNT 

Definition at line 1543 of file tritonserver.h.

◆ tritonserver_requestflag_enum

TRITONSERVER_InferenceRequest.

Object representing an inference request. The inference request provides the meta-data and input tensor values needed for an inference and returns the inference result meta-data and output tensors. An inference request object can be modified and reused multiple times. Inference request flags. The enum values must be power-of-2 values.

Enumerator
TRITONSERVER_REQUEST_FLAG_SEQUENCE_START 
TRITONSERVER_REQUEST_FLAG_SEQUENCE_END 

Definition at line 870 of file tritonserver.h.

◆ tritonserver_requestreleaseflag_enum

Inference request release flags.

The enum values must be power-of-2 values.

Enumerator
TRITONSERVER_REQUEST_RELEASE_ALL 

Definition at line 877 of file tritonserver.h.

◆ tritonserver_responsecompleteflag_enum

Inference response complete flags.

The enum values must be power-of-2 values.

Enumerator
TRITONSERVER_RESPONSE_COMPLETE_FINAL 

Definition at line 883 of file tritonserver.h.

◆ tritonserver_traceactivity_enum

Trace activities.

Enumerator
TRITONSERVER_TRACE_REQUEST_START 
TRITONSERVER_TRACE_QUEUE_START 
TRITONSERVER_TRACE_COMPUTE_START 
TRITONSERVER_TRACE_COMPUTE_INPUT_END 
TRITONSERVER_TRACE_COMPUTE_OUTPUT_START 
TRITONSERVER_TRACE_COMPUTE_END 
TRITONSERVER_TRACE_REQUEST_END 
TRITONSERVER_TRACE_TENSOR_QUEUE_INPUT 
TRITONSERVER_TRACE_TENSOR_BACKEND_INPUT 
TRITONSERVER_TRACE_TENSOR_BACKEND_OUTPUT 

Definition at line 700 of file tritonserver.h.

◆ tritonserver_tracelevel_enum

TRITONSERVER_InferenceTrace.

Object that represents tracing for an inference request. Trace levels. The trace level controls the type of trace activities that are reported for an inference request.

Trace level values are power-of-2 and can be combined to trace multiple types of activities. For example, use (TRITONSERVER_TRACE_LEVEL_TIMESTAMPS | TRITONSERVER_TRACE_LEVEL_TENSORS) to trace both timestamps and tensors for an inference request.

TRITONSERVER_TRACE_LEVEL_MIN and TRITONSERVER_TRACE_LEVEL_MAX are deprecated and should not be used.

Enumerator
TRITONSERVER_TRACE_LEVEL_DISABLED 

Tracing disabled. No trace activities are reported.

TRITONSERVER_TRACE_LEVEL_MIN 

Deprecated. Use TRITONSERVER_TRACE_LEVEL_TIMESTAMPS.

TRITONSERVER_TRACE_LEVEL_MAX 

Deprecated. Use TRITONSERVER_TRACE_LEVEL_TIMESTAMPS.

TRITONSERVER_TRACE_LEVEL_TIMESTAMPS 

Record timestamps for the inference request.

TRITONSERVER_TRACE_LEVEL_TENSORS 

Record input and output tensor values for the inference request.

Definition at line 677 of file tritonserver.h.

◆ tritonserver_txn_property_flag_enum

Model transaction policy flags.

The enum values must be power-of-2 values.

Enumerator
TRITONSERVER_TXN_ONE_TO_ONE 
TRITONSERVER_TXN_DECOUPLED 

Definition at line 1943 of file tritonserver.h.

Function Documentation

◆ TRITONSERVER_ApiVersion()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ApiVersion ( uint32_t *  major,
uint32_t *  minor 
)

Get the TRITONBACKEND API version supported by the Triton shared library.

This value can be compared against the TRITONSERVER_API_VERSION_MAJOR and TRITONSERVER_API_VERSION_MINOR used to build the client to ensure that Triton shared library is compatible with the client.

Parameters
majorReturns the TRITONSERVER API major version supported by Triton.
minorReturns the TRITONSERVER API minor version supported by Triton.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_BufferAttributesByteSize()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_BufferAttributesByteSize ( TRITONSERVER_BufferAttributes *  buffer_attributes,
size_t *  byte_size 
)

Get the byte size field of the buffer attributes.

Parameters
buffer_attributesThe buffer attributes object.
byte_sizeReturns the byte size associated with the buffer attributes object.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_BufferAttributesCudaIpcHandle()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_BufferAttributesCudaIpcHandle ( TRITONSERVER_BufferAttributes *  buffer_attributes,
void **  cuda_ipc_handle 
)

Get the CudaIpcHandle field of the buffer attributes object.

Parameters
buffer_attributesThe buffer attributes object.
cuda_ipc_handleReturns the memory type associated with the buffer attributes object. If the cudaIpcHandle does not exist for the buffer, nullptr will be returned.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_BufferAttributesDelete()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_BufferAttributesDelete ( TRITONSERVER_BufferAttributes *  buffer_attributes)

Delete a buffer attributes object.

Parameters
buffer_attributesThe buffer_attributes object.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_BufferAttributesMemoryType()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_BufferAttributesMemoryType ( TRITONSERVER_BufferAttributes *  buffer_attributes,
TRITONSERVER_MemoryType memory_type 
)

Get the memory type field of the buffer attributes.

Parameters
buffer_attributesThe buffer attributes object.
memory_typeReturns the memory type associated with the buffer attributes object.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_BufferAttributesMemoryTypeId()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_BufferAttributesMemoryTypeId ( TRITONSERVER_BufferAttributes *  buffer_attributes,
int64_t *  memory_type_id 
)

Get the memory type id field of the buffer attributes.

Parameters
buffer_attributesThe buffer attributes object.
memory_type_idReturns the memory type id associated with the buffer attributes object.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_BufferAttributesNew()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_BufferAttributesNew ( TRITONSERVER_BufferAttributes **  buffer_attributes)

TRITONSERVER_BufferAttributes.

API to create, modify, or retrieve attributes associated with a buffer. Create a new buffer attributes object. The caller takes ownership of the TRITONSERVER_BufferAttributes object and must call TRITONSERVER_BufferAttributesDelete to release the object.

Parameters
buffer_attributesReturns the new buffer attributes object.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_BufferAttributesSetByteSize()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_BufferAttributesSetByteSize ( TRITONSERVER_BufferAttributes *  buffer_attributes,
size_t  byte_size 
)

Set the byte size field of the buffer attributes.

Parameters
buffer_attributesThe buffer attributes object.
byte_sizeByte size to assign to the buffer attributes object.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_BufferAttributesSetCudaIpcHandle()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_BufferAttributesSetCudaIpcHandle ( TRITONSERVER_BufferAttributes *  buffer_attributes,
void *  cuda_ipc_handle 
)

Set the CudaIpcHandle field of the buffer attributes.

Parameters
buffer_attributesThe buffer attributes object.
cuda_ipc_handleThe CudaIpcHandle to assign to the buffer attributes object.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_BufferAttributesSetMemoryType()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_BufferAttributesSetMemoryType ( TRITONSERVER_BufferAttributes *  buffer_attributes,
TRITONSERVER_MemoryType  memory_type 
)

Set the memory type field of the buffer attributes.

Parameters
buffer_attributesThe buffer attributes object.
memory_typeMemory type to assign to the buffer attributes object.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_BufferAttributesSetMemoryTypeId()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_BufferAttributesSetMemoryTypeId ( TRITONSERVER_BufferAttributes *  buffer_attributes,
int64_t  memory_type_id 
)

Set the memory type id field of the buffer attributes.

Parameters
buffer_attributesThe buffer attributes object.
memory_type_idMemory type id to assign to the buffer attributes object.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_DataTypeByteSize()

TRITONSERVER_DECLSPEC uint32_t TRITONSERVER_DataTypeByteSize ( TRITONSERVER_DataType  datatype)

Get the size of a Triton datatype in bytes.

Zero is returned for TRITONSERVER_TYPE_BYTES because it have variable size. Zero is returned for TRITONSERVER_TYPE_INVALID.

Parameters
dtypeThe datatype.
Returns
The size of the datatype.

◆ TRITONSERVER_DataTypeString()

const TRITONSERVER_DECLSPEC char* TRITONSERVER_DataTypeString ( TRITONSERVER_DataType  datatype)

Get the string representation of a data type.

The returned string is not owned by the caller and so should not be modified or freed.

Parameters
datatypeThe data type.
Returns
The string representation of the data type.

◆ TRITONSERVER_ErrorCode()

TRITONSERVER_DECLSPEC TRITONSERVER_Error_Code TRITONSERVER_ErrorCode ( TRITONSERVER_Error *  error)

Get the error code.

Parameters
errorThe error object.
Returns
The error code.

◆ TRITONSERVER_ErrorCodeString()

const TRITONSERVER_DECLSPEC char* TRITONSERVER_ErrorCodeString ( TRITONSERVER_Error *  error)

Get the string representation of an error code.

The returned string is not owned by the caller and so should not be modified or freed. The lifetime of the returned string extends only as long as 'error' and must not be accessed once 'error' is deleted.

Parameters
errorThe error object.
Returns
The string representation of the error code.

◆ TRITONSERVER_ErrorDelete()

TRITONSERVER_DECLSPEC void TRITONSERVER_ErrorDelete ( TRITONSERVER_Error *  error)

Delete an error object.

Parameters
errorThe error object.

◆ TRITONSERVER_ErrorMessage()

const TRITONSERVER_DECLSPEC char* TRITONSERVER_ErrorMessage ( TRITONSERVER_Error *  error)

Get the error message.

The returned string is not owned by the caller and so should not be modified or freed. The lifetime of the returned string extends only as long as 'error' and must not be accessed once 'error' is deleted.

Parameters
errorThe error object.
Returns
The error message.

◆ TRITONSERVER_ErrorNew()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ErrorNew ( TRITONSERVER_Error_Code  code,
const char *  msg 
)

Create a new error object.

The caller takes ownership of the TRITONSERVER_Error object and must call TRITONSERVER_ErrorDelete to release the object.

Parameters
codeThe error code.
msgThe error message.
Returns
A new TRITONSERVER_Error object.

◆ TRITONSERVER_GetMetricKind()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_GetMetricKind ( TRITONSERVER_Metric *  metric,
TRITONSERVER_MetricKind kind 
)

Get the TRITONSERVER_MetricKind of metric and its corresponding family.

Parameters
metricThe metric object to query.
kindReturns the TRITONSERVER_MetricKind of metric.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestAddInput()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestAddInput ( TRITONSERVER_InferenceRequest *  inference_request,
const char *  name,
const TRITONSERVER_DataType  datatype,
const int64_t *  shape,
uint64_t  dim_count 
)

Add an input to a request.

Parameters
inference_requestThe request object.
nameThe name of the input.
datatypeThe type of the input. Valid type names are BOOL, UINT8, UINT16, UINT32, UINT64, INT8, INT16, INT32, INT64, FP16, FP32, FP64, and BYTES.
shapeThe shape of the input.
dim_countThe number of dimensions of 'shape'.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestAddRawInput()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestAddRawInput ( TRITONSERVER_InferenceRequest *  inference_request,
const char *  name 
)

Add a raw input to a request.

The name recognized by the model, data type and shape of the input will be deduced from model configuration. This function must be called at most once on request with no other input to ensure the deduction is accurate.

Parameters
inference_requestThe request object.
nameThe name of the input. This name is only used as a reference of the raw input in other Tritonserver APIs. It doesn't assoicate with the name used in the model.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestAddRequestedOutput()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestAddRequestedOutput ( TRITONSERVER_InferenceRequest *  inference_request,
const char *  name 
)

Add an output request to an inference request.

Parameters
inference_requestThe request object.
nameThe name of the output.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestAppendInputData()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestAppendInputData ( TRITONSERVER_InferenceRequest *  inference_request,
const char *  name,
const void *  base,
size_t  byte_size,
TRITONSERVER_MemoryType  memory_type,
int64_t  memory_type_id 
)

Assign a buffer of data to an input.

The buffer will be appended to any existing buffers for that input. The 'inference_request' object takes ownership of the buffer and so the caller should not modify or free the buffer until that ownership is released by 'inference_request' being deleted or by the input being removed from 'inference_request'.

Parameters
inference_requestThe request object.
nameThe name of the input.
baseThe base address of the input data.
byte_sizeThe size, in bytes, of the input data.
memory_typeThe memory type of the input data.
memory_type_idThe memory type id of the input data.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestAppendInputDataWithBufferAttributes()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestAppendInputDataWithBufferAttributes ( TRITONSERVER_InferenceRequest *  inference_request,
const char *  name,
const void *  base,
TRITONSERVER_BufferAttributes *  buffer_attributes 
)

Assign a buffer of data to an input.

The buffer will be appended to any existing buffers for that input. The 'inference_request' object takes ownership of the buffer and so the caller should not modify or free the buffer until that ownership is released by 'inference_request' being deleted or by the input being removed from 'inference_request'.

Parameters
inference_requestThe request object.
nameThe name of the input.
baseThe base address of the input data.
buffer_attributesThe buffer attrubutes of the input.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy ( TRITONSERVER_InferenceRequest *  inference_request,
const char *  name,
const void *  base,
size_t  byte_size,
TRITONSERVER_MemoryType  memory_type,
int64_t  memory_type_id,
const char *  host_policy_name 
)

Assign a buffer of data to an input for execution on all model instances with the specified host policy.

The buffer will be appended to any existing buffers for that input on all devices with this host policy. The 'inference_request' object takes ownership of the buffer and so the caller should not modify or free the buffer until that ownership is released by 'inference_request' being deleted or by the input being removed from 'inference_request'. If the execution is scheduled on a device that does not have a input buffer specified using this function, then the input buffer specified with TRITONSERVER_InferenceRequestAppendInputData will be used so a non-host policy specific version of data must be added using that API.

Parameters
inference_requestThe request object.
nameThe name of the input.
baseThe base address of the input data.
byte_sizeThe size, in bytes, of the input data.
memory_typeThe memory type of the input data.
memory_type_idThe memory type id of the input data.
host_policy_nameAll model instances executing with this host_policy will use this input buffer for execution.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestCorrelationId()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestCorrelationId ( TRITONSERVER_InferenceRequest *  inference_request,
uint64_t *  correlation_id 
)

Get the correlation ID of the inference request as an unsigned integer.

Default is 0, which indicates that the request has no correlation ID. If the correlation id associated with the inference request is a string, this function will return a failure. The correlation ID is used to indicate two or more inference request are related to each other. How this relationship is handled by the inference server is determined by the model's scheduling policy.

Parameters
inference_requestThe request object.
correlation_idReturns the correlation ID.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestCorrelationIdString()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestCorrelationIdString ( TRITONSERVER_InferenceRequest *  inference_request,
const char **  correlation_id 
)

Get the correlation ID of the inference request as a string.

Default is empty "", which indicates that the request has no correlation ID. If the correlation id associated with the inference request is an unsigned integer, then this function will return a failure. The correlation ID is used to indicate two or more inference request are related to each other. How this relationship is handled by the inference server is determined by the model's scheduling policy.

Parameters
inference_requestThe request object.
correlation_idReturns the correlation ID.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestDelete()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestDelete ( TRITONSERVER_InferenceRequest *  inference_request)

Delete an inference request object.

Parameters
inference_requestThe request object.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestFlags()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestFlags ( TRITONSERVER_InferenceRequest *  inference_request,
uint32_t *  flags 
)

Get the flag(s) associated with a request.

On return 'flags' holds a bitwise-or of all flag values, see TRITONSERVER_RequestFlag for available flags.

Parameters
inference_requestThe request object.
flagsReturns the flags.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestId()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestId ( TRITONSERVER_InferenceRequest *  inference_request,
const char **  id 
)

Get the ID for a request.

The returned ID is owned by 'inference_request' and must not be modified or freed by the caller.

Parameters
inference_requestThe request object.
idReturns the ID.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestNew()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestNew ( TRITONSERVER_InferenceRequest **  inference_request,
TRITONSERVER_Server *  server,
const char *  model_name,
const int64_t  model_version 
)

Create a new inference request object.

Parameters
inference_requestReturns the new request object.
serverthe inference server object.
model_nameThe name of the model to use for the request.
model_versionThe version of the model to use for the request. If -1 then the server will choose a version based on the model's policy.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestPriority()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestPriority ( TRITONSERVER_InferenceRequest *  inference_request,
uint32_t *  priority 
)

Get the priority for a request.

The default is 0 indicating that the request does not specify a priority and so will use the model's default priority.

Parameters
inference_requestThe request object.
priorityReturns the priority level.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestRemoveAllInputData()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestRemoveAllInputData ( TRITONSERVER_InferenceRequest *  inference_request,
const char *  name 
)

Clear all input data from an input, releasing ownership of the buffer(s) that were appended to the input with TRITONSERVER_InferenceRequestAppendInputData or TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy.

Parameters
inference_requestThe request object.
nameThe name of the input.

◆ TRITONSERVER_InferenceRequestRemoveAllInputs()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestRemoveAllInputs ( TRITONSERVER_InferenceRequest *  inference_request)

Remove all inputs from a request.

Parameters
inference_requestThe request object.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestRemoveAllRequestedOutputs()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestRemoveAllRequestedOutputs ( TRITONSERVER_InferenceRequest *  inference_request)

Remove all output requests from an inference request.

Parameters
inference_requestThe request object.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestRemoveInput()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestRemoveInput ( TRITONSERVER_InferenceRequest *  inference_request,
const char *  name 
)

Remove an input from a request.

Parameters
inference_requestThe request object.
nameThe name of the input.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestRemoveRequestedOutput()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestRemoveRequestedOutput ( TRITONSERVER_InferenceRequest *  inference_request,
const char *  name 
)

Remove an output request from an inference request.

Parameters
inference_requestThe request object.
nameThe name of the output.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestSetCorrelationId()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetCorrelationId ( TRITONSERVER_InferenceRequest *  inference_request,
uint64_t  correlation_id 
)

Set the correlation ID of the inference request to be an unsigned integer.

Default is 0, which indicates that the request has no correlation ID. The correlation ID is used to indicate two or more inference request are related to each other. How this relationship is handled by the inference server is determined by the model's scheduling policy.

Parameters
inference_requestThe request object.
correlation_idThe correlation ID.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestSetCorrelationIdString()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetCorrelationIdString ( TRITONSERVER_InferenceRequest *  inference_request,
const char *  correlation_id 
)

Set the correlation ID of the inference request to be a string.

The correlation ID is used to indicate two or more inference request are related to each other. How this relationship is handled by the inference server is determined by the model's scheduling policy.

Parameters
inference_requestThe request object.
correlation_idThe correlation ID.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestSetFlags()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetFlags ( TRITONSERVER_InferenceRequest *  inference_request,
uint32_t  flags 
)

Set the flag(s) associated with a request.

'flags' should hold a bitwise-or of all flag values, see TRITONSERVER_RequestFlag for available flags.

Parameters
inference_requestThe request object.
flagsThe flags.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestSetId()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetId ( TRITONSERVER_InferenceRequest *  inference_request,
const char *  id 
)

Set the ID for a request.

Parameters
inference_requestThe request object.
idThe ID.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestSetPriority()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetPriority ( TRITONSERVER_InferenceRequest *  inference_request,
uint32_t  priority 
)

Set the priority for a request.

The default is 0 indicating that the request does not specify a priority and so will use the model's default priority.

Parameters
inference_requestThe request object.
priorityThe priority level.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestSetReleaseCallback()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetReleaseCallback ( TRITONSERVER_InferenceRequest *  inference_request,
TRITONSERVER_InferenceRequestReleaseFn_t  request_release_fn,
void *  request_release_userp 
)

Set the release callback for an inference request.

The release callback is called by Triton to return ownership of the request object.

Parameters
inference_requestThe request object.
request_release_fnThe function called to return ownership of the 'inference_request' object.
request_release_userpUser-provided pointer that is delivered to the 'request_release_fn' callback.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestSetResponseCallback()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetResponseCallback ( TRITONSERVER_InferenceRequest *  inference_request,
TRITONSERVER_ResponseAllocator *  response_allocator,
void *  response_allocator_userp,
TRITONSERVER_InferenceResponseCompleteFn_t  response_fn,
void *  response_userp 
)

Set the allocator and response callback for an inference request.

The allocator is used to allocate buffers for any output tensors included in responses that are produced for this request. The response callback is called to return response objects representing responses produced for this request.

Parameters
inference_requestThe request object.
response_allocatorThe TRITONSERVER_ResponseAllocator to use to allocate buffers to hold inference results.
response_allocator_userpUser-provided pointer that is delivered to the response allocator's start and allocation functions.
response_fnThe function called to deliver an inference response for this request.
response_userpUser-provided pointer that is delivered to the 'response_fn' callback.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestSetTimeoutMicroseconds()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestSetTimeoutMicroseconds ( TRITONSERVER_InferenceRequest *  inference_request,
uint64_t  timeout_us 
)

Set the timeout for a request, in microseconds.

The default is 0 which indicates that the request has no timeout.

Parameters
inference_requestThe request object.
timeout_usThe timeout, in microseconds.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceRequestTimeoutMicroseconds()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceRequestTimeoutMicroseconds ( TRITONSERVER_InferenceRequest *  inference_request,
uint64_t *  timeout_us 
)

Get the timeout for a request, in microseconds.

The default is 0 which indicates that the request has no timeout.

Parameters
inference_requestThe request object.
timeout_usReturns the timeout, in microseconds.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceResponseDelete()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseDelete ( TRITONSERVER_InferenceResponse *  inference_response)

TRITONSERVER_InferenceResponse.

Object representing an inference response. The inference response provides the meta-data and output tensor values calculated by the inference. Delete an inference response object.

Parameters
inference_responseThe response object.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceResponseError()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseError ( TRITONSERVER_InferenceResponse *  inference_response)

Return the error status of an inference response.

Return a TRITONSERVER_Error object on failure, return nullptr on success. The returned error object is owned by 'inference_response' and so should not be deleted by the caller.

Parameters
inference_responseThe response object.
Returns
a TRITONSERVER_Error indicating the success or failure status of the response.

◆ TRITONSERVER_InferenceResponseId()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseId ( TRITONSERVER_InferenceResponse *  inference_response,
const char **  request_id 
)

Get the ID of the request corresponding to a response.

The caller does not own the returned ID and must not modify or delete it. The lifetime of all returned values extends until 'inference_response' is deleted.

Parameters
inference_responseThe response object.
request_idReturns the ID of the request corresponding to this response.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceResponseModel()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseModel ( TRITONSERVER_InferenceResponse *  inference_response,
const char **  model_name,
int64_t *  model_version 
)

Get model used to produce a response.

The caller does not own the returned model name value and must not modify or delete it. The lifetime of all returned values extends until 'inference_response' is deleted.

Parameters
inference_responseThe response object.
model_nameReturns the name of the model.
model_versionReturns the version of the model. this response.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceResponseOutput()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseOutput ( TRITONSERVER_InferenceResponse *  inference_response,
const uint32_t  index,
const char **  name,
TRITONSERVER_DataType datatype,
const int64_t **  shape,
uint64_t *  dim_count,
const void **  base,
size_t *  byte_size,
TRITONSERVER_MemoryType memory_type,
int64_t *  memory_type_id,
void **  userp 
)

Get all information about an output tensor.

The tensor data is returned as the base pointer to the data and the size, in bytes, of the data. The caller does not own any of the returned values and must not modify or delete them. The lifetime of all returned values extends until 'inference_response' is deleted.

Parameters
inference_responseThe response object.
indexThe index of the output tensor, must be 0 <= index < count, where 'count' is the value returned by TRITONSERVER_InferenceResponseOutputCount.
nameReturns the name of the output.
datatypeReturns the type of the output.
shapeReturns the shape of the output.
dim_countReturns the number of dimensions of the returned shape.
baseReturns the tensor data for the output.
byte_sizeReturns the size, in bytes, of the data.
memory_typeReturns the memory type of the data.
memory_type_idReturns the memory type id of the data.
userpThe user-specified value associated with the buffer in TRITONSERVER_ResponseAllocatorAllocFn_t.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceResponseOutputClassificationLabel()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseOutputClassificationLabel ( TRITONSERVER_InferenceResponse *  inference_response,
const uint32_t  index,
const size_t  class_index,
const char **  label 
)

Get a classification label associated with an output for a given index.

The caller does not own the returned label and must not modify or delete it. The lifetime of all returned label extends until 'inference_response' is deleted.

Parameters
inference_responseThe response object.
indexThe index of the output tensor, must be 0 <= index < count, where 'count' is the value returned by TRITONSERVER_InferenceResponseOutputCount.
class_indexThe index of the class.
nameReturns the label corresponding to 'class_index' or nullptr if no label.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceResponseOutputCount()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseOutputCount ( TRITONSERVER_InferenceResponse *  inference_response,
uint32_t *  count 
)

Get the number of outputs available in the response.

Parameters
inference_responseThe response object.
countReturns the number of output tensors.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceResponseParameter()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseParameter ( TRITONSERVER_InferenceResponse *  inference_response,
const uint32_t  index,
const char **  name,
TRITONSERVER_ParameterType type,
const void **  vvalue 
)

Get all information about a parameter.

The caller does not own any of the returned values and must not modify or delete them. The lifetime of all returned values extends until 'inference_response' is deleted.

The 'vvalue' returns a void* pointer that must be cast appropriately based on 'type'. For example:

void* vvalue; TRITONSERVER_ParameterType type; TRITONSERVER_InferenceResponseParameter( response, index, &name, &type, &vvalue); switch (type) { case TRITONSERVER_PARAMETER_BOOL: bool value = *(reinterpret_cast<bool*>(vvalue)); ... case TRITONSERVER_PARAMETER_INT: int64_t value = *(reinterpret_cast<int64_t*>(vvalue)); ... case TRITONSERVER_PARAMETER_STRING: const char* value = reinterpret_cast<const char*>(vvalue); ...

Parameters
inference_responseThe response object.
indexThe index of the parameter, must be 0 <= index < count, where 'count' is the value returned by TRITONSERVER_InferenceResponseParameterCount.
nameReturns the name of the parameter.
typeReturns the type of the parameter.
vvalueReturns a pointer to the parameter value.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceResponseParameterCount()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceResponseParameterCount ( TRITONSERVER_InferenceResponse *  inference_response,
uint32_t *  count 
)

Get the number of parameters available in the response.

Parameters
inference_responseThe response object.
countReturns the number of parameters.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceTraceActivityString()

const TRITONSERVER_DECLSPEC char* TRITONSERVER_InferenceTraceActivityString ( TRITONSERVER_InferenceTraceActivity  activity)

Get the string representation of a trace activity.

The returned string is not owned by the caller and so should not be modified or freed.

Parameters
activityThe trace activity.
Returns
The string representation of the trace activity.

◆ TRITONSERVER_InferenceTraceDelete()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceDelete ( TRITONSERVER_InferenceTrace *  trace)

Delete a trace object.

Parameters
traceThe trace object.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceTraceId()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceId ( TRITONSERVER_InferenceTrace *  trace,
uint64_t *  id 
)

Get the id associated with a trace.

Every trace is assigned an id that is unique across all traces created for a Triton server.

Parameters
traceThe trace.
idReturns the id associated with the trace.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceTraceLevelString()

const TRITONSERVER_DECLSPEC char* TRITONSERVER_InferenceTraceLevelString ( TRITONSERVER_InferenceTraceLevel  level)

Get the string representation of a trace level.

The returned string is not owned by the caller and so should not be modified or freed.

Parameters
levelThe trace level.
Returns
The string representation of the trace level.

◆ TRITONSERVER_InferenceTraceModelName()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceModelName ( TRITONSERVER_InferenceTrace *  trace,
const char **  model_name 
)

Get the name of the model associated with a trace.

The caller does not own the returned string and must not modify or delete it. The lifetime of the returned string extends only as long as 'trace'.

Parameters
traceThe trace.
model_nameReturns the name of the model associated with the trace.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceTraceModelVersion()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceModelVersion ( TRITONSERVER_InferenceTrace *  trace,
int64_t *  model_version 
)

Get the version of the model associated with a trace.

Parameters
traceThe trace.
model_versionReturns the version of the model associated with the trace.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceTraceNew()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceNew ( TRITONSERVER_InferenceTrace **  trace,
TRITONSERVER_InferenceTraceLevel  level,
uint64_t  parent_id,
TRITONSERVER_InferenceTraceActivityFn_t  activity_fn,
TRITONSERVER_InferenceTraceReleaseFn_t  release_fn,
void *  trace_userp 
)

Create a new inference trace object.

The caller takes ownership of the TRITONSERVER_InferenceTrace object and must call TRITONSERVER_InferenceTraceDelete to release the object.

The activity callback function will be called to report activity for 'trace' as well as for any child traces that are spawned by 'trace', and so the activity callback must check the trace object to determine specifically what activity is being reported.

The release callback is called for both 'trace' and for any child traces spawned by 'trace'.

Parameters
traceReturns the new inference trace object.
levelThe tracing level.
parent_idThe parent trace id for this trace. A value of 0 indicates that there is not parent trace.
activity_fnThe callback function where activity for the trace is reported.
release_fnThe callback function called when all activity is complete for the trace.
trace_userpUser-provided pointer that is delivered to the activity and release callback functions.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceTraceParentId()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceParentId ( TRITONSERVER_InferenceTrace *  trace,
uint64_t *  parent_id 
)

Get the parent id associated with a trace.

The parent id indicates a parent-child relationship between two traces. A parent id value of 0 indicates that there is no parent trace.

Parameters
traceThe trace.
idReturns the parent id associated with the trace.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InferenceTraceTensorNew()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_InferenceTraceTensorNew ( TRITONSERVER_InferenceTrace **  trace,
TRITONSERVER_InferenceTraceLevel  level,
uint64_t  parent_id,
TRITONSERVER_InferenceTraceActivityFn_t  activity_fn,
TRITONSERVER_InferenceTraceTensorActivityFn_t  tensor_activity_fn,
TRITONSERVER_InferenceTraceReleaseFn_t  release_fn,
void *  trace_userp 
)

Create a new inference trace object.

The caller takes ownership of the TRITONSERVER_InferenceTrace object and must call TRITONSERVER_InferenceTraceDelete to release the object.

The timeline and tensor activity callback function will be called to report activity for 'trace' as well as for any child traces that are spawned by 'trace', and so the activity callback must check the trace object to determine specifically what activity is being reported.

The release callback is called for both 'trace' and for any child traces spawned by 'trace'.

Parameters
traceReturns the new inference trace object.
levelThe tracing level.
parent_idThe parent trace id for this trace. A value of 0 indicates that there is not parent trace.
activity_fnThe callback function where timeline activity for the trace is reported.
tensor_activity_fnThe callback function where tensor activity for the trace is reported.
release_fnThe callback function called when all activity is complete for the trace.
trace_userpUser-provided pointer that is delivered to the activity and release callback functions.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_InstanceGroupKindString()

const TRITONSERVER_DECLSPEC char* TRITONSERVER_InstanceGroupKindString ( TRITONSERVER_InstanceGroupKind  kind)

Get the string representation of an instance-group kind.

The returned string is not owned by the caller and so should not be modified or freed.

Parameters
kindThe instance-group kind.
Returns
The string representation of the kind.

◆ TRITONSERVER_LogIsEnabled()

TRITONSERVER_DECLSPEC bool TRITONSERVER_LogIsEnabled ( TRITONSERVER_LogLevel  level)

Is a log level enabled?

Parameters
levelThe log level.
Returns
True if the log level is enabled, false if not enabled.

◆ TRITONSERVER_LogMessage()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_LogMessage ( TRITONSERVER_LogLevel  level,
const char *  filename,
const int  line,
const char *  msg 
)

Log a message at a given log level if that level is enabled.

Parameters
levelThe log level.
filenameThe file name of the location of the log message.
lineThe line number of the log message.
msgThe log message.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MemoryTypeString()

const TRITONSERVER_DECLSPEC char* TRITONSERVER_MemoryTypeString ( TRITONSERVER_MemoryType  memtype)

Get the string representation of a memory type.

The returned string is not owned by the caller and so should not be modified or freed.

Parameters
memtypeThe memory type.
Returns
The string representation of the memory type.

◆ TRITONSERVER_MessageDelete()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MessageDelete ( TRITONSERVER_Message *  message)

Delete a message object.

Parameters
messageThe message object.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MessageNewFromSerializedJson()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MessageNewFromSerializedJson ( TRITONSERVER_Message **  message,
const char *  base,
size_t  byte_size 
)

TRITONSERVER_Message.

Object representing a Triton Server message. Create a new message object from serialized JSON string.

Parameters
messageThe message object.
baseThe base of the serialized JSON.
byte_sizeThe size, in bytes, of the serialized message.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MessageSerializeToJson()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MessageSerializeToJson ( TRITONSERVER_Message *  message,
const char **  base,
size_t *  byte_size 
)

Get the base and size of the buffer containing the serialized message in JSON format.

The buffer is owned by the TRITONSERVER_Message object and should not be modified or freed by the caller. The lifetime of the buffer extends only as long as 'message' and must not be accessed once 'message' is deleted.

Parameters
messageThe message object.
baseReturns the base of the serialized message.
byte_sizeReturns the size, in bytes, of the serialized message.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MetricDelete()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricDelete ( TRITONSERVER_Metric *  metric)

Delete a metric object.

All TRITONSERVER_Metric* objects should be deleted BEFORE their corresponding TRITONSERVER_MetricFamily* objects have been deleted. If a family is deleted before its metrics, an error will be returned.

Parameters
metricThe metric object to delete.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MetricFamilyDelete()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricFamilyDelete ( TRITONSERVER_MetricFamily *  family)

Delete a metric family object.

A TRITONSERVER_MetricFamily* object should be deleted AFTER its corresponding TRITONSERVER_Metric* objects have been deleted. Attempting to delete a family before its metrics will return an error.

Parameters
familyThe metric family object to delete.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MetricFamilyNew()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricFamilyNew ( TRITONSERVER_MetricFamily **  family,
const TRITONSERVER_MetricKind  kind,
const char *  name,
const char *  description 
)

Create a new metric family object.

The caller takes ownership of the TRITONSERVER_MetricFamily object and must call TRITONSERVER_MetricFamilyDelete to release the object.

Parameters
familyReturns the new metric family object.
kindThe type of metric family to create.
nameThe name of the metric family seen when calling the metrics endpoint.
descriptionThe description of the metric family seen when calling the metrics endpoint.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MetricIncrement()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricIncrement ( TRITONSERVER_Metric *  metric,
double  value 
)

Increment the current value of metric by value.

Supports metrics of kind TRITONSERVER_METRIC_KIND_GAUGE for any value, and TRITONSERVER_METRIC_KIND_COUNTER for non-negative values. Returns TRITONSERVER_ERROR_UNSUPPORTED for unsupported TRITONSERVER_MetricKind and TRITONSERVER_ERROR_INVALID_ARG for negative values on a TRITONSERVER_METRIC_KIND_COUNTER metric.

Parameters
metricThe metric object to update.
valueThe amount to increment the metric's value by.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MetricNew()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricNew ( TRITONSERVER_Metric **  metric,
TRITONSERVER_MetricFamily *  family,
const TRITONSERVER_Parameter **  labels,
const uint64_t  label_count 
)

Create a new metric object.

The caller takes ownership of the TRITONSERVER_Metric object and must call TRITONSERVER_MetricDelete to release the object. The caller is also responsible for ownership of the labels passed in. Each label can be deleted immediately after creating the metric with TRITONSERVER_ParameterDelete if not re-using the labels.

Parameters
metricReturns the new metric object.
familyThe metric family to add this new metric to.
labelsThe array of labels to associate with this new metric.
label_countThe number of labels.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MetricsDelete()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricsDelete ( TRITONSERVER_Metrics *  metrics)

Delete a metrics object.

Parameters
metricsThe metrics object.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MetricSet()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricSet ( TRITONSERVER_Metric *  metric,
double  value 
)

Set the current value of metric to value.

Supports metrics of kind TRITONSERVER_METRIC_KIND_GAUGE and returns TRITONSERVER_ERROR_UNSUPPORTED for unsupported TRITONSERVER_MetricKind.

Parameters
metricThe metric object to update.
valueThe amount to set metric's value to.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MetricsFormatted()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricsFormatted ( TRITONSERVER_Metrics *  metrics,
TRITONSERVER_MetricFormat  format,
const char **  base,
size_t *  byte_size 
)

Get a buffer containing the metrics in the specified format.

For each format the buffer contains the following:

TRITONSERVER_METRIC_PROMETHEUS: 'base' points to a single multiline string (char*) that gives a text representation of the metrics in prometheus format. 'byte_size' returns the length of the string in bytes.

The buffer is owned by the 'metrics' object and should not be modified or freed by the caller. The lifetime of the buffer extends only as long as 'metrics' and must not be accessed once 'metrics' is deleted.

Parameters
metricsThe metrics object.
formatThe format to use for the returned metrics.
baseReturns a pointer to the base of the formatted metrics, as described above.
byte_sizeReturns the size, in bytes, of the formatted metrics.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_MetricValue()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_MetricValue ( TRITONSERVER_Metric *  metric,
double *  value 
)

Get the current value of a metric object.

Supports metrics of kind TRITONSERVER_METRIC_KIND_COUNTER and TRITONSERVER_METRIC_KIND_GAUGE, and returns TRITONSERVER_ERROR_UNSUPPORTED for unsupported TRITONSERVER_MetricKind.

Parameters
metricThe metric object to query.
valueReturns the current value of the metric object.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ParameterBytesNew()

TRITONSERVER_DECLSPEC TRITONSERVER_Parameter* TRITONSERVER_ParameterBytesNew ( const char *  name,
const void *  byte_ptr,
const uint64_t  size 
)

Create a new parameter object with type TRITONSERVER_PARAMETER_BYTES.

The caller takes ownership of the TRITONSERVER_Parameter object and must call TRITONSERVER_ParameterDelete to release the object. The object only maintains a shallow copy of the 'byte_ptr' so the data content must be valid until the parameter object is deleted.

Parameters
nameThe parameter name.
byte_ptrThe pointer to the data content.
sizeThe size of the data content.
Returns
A new TRITONSERVER_Error object.

◆ TRITONSERVER_ParameterDelete()

TRITONSERVER_DECLSPEC void TRITONSERVER_ParameterDelete ( TRITONSERVER_Parameter *  parameter)

Delete an parameter object.

Parameters
parameterThe parameter object.

◆ TRITONSERVER_ParameterNew()

TRITONSERVER_DECLSPEC TRITONSERVER_Parameter* TRITONSERVER_ParameterNew ( const char *  name,
const TRITONSERVER_ParameterType  type,
const void *  value 
)

Create a new parameter object.

The caller takes ownership of the TRITONSERVER_Parameter object and must call TRITONSERVER_ParameterDelete to release the object. The object will maintain its own copy of the 'value'

Parameters
nameThe parameter name.
typeThe parameter type.
valueThe pointer to the value.
Returns
A new TRITONSERVER_Parameter object. 'nullptr' will be returned if 'type' is 'TRITONSERVER_PARAMETER_BYTES'. The caller should use TRITONSERVER_ParameterBytesNew to create parameter with bytes type.

◆ TRITONSERVER_ParameterTypeString()

const TRITONSERVER_DECLSPEC char* TRITONSERVER_ParameterTypeString ( TRITONSERVER_ParameterType  paramtype)

Get the string representation of a parameter type.

The returned string is not owned by the caller and so should not be modified or freed.

Parameters
paramtypeThe parameter type.
Returns
The string representation of the parameter type.

◆ TRITONSERVER_ResponseAllocatorDelete()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ResponseAllocatorDelete ( TRITONSERVER_ResponseAllocator *  allocator)

Delete a response allocator.

Parameters
allocatorThe response allocator object.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ResponseAllocatorNew()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ResponseAllocatorNew ( TRITONSERVER_ResponseAllocator **  allocator,
TRITONSERVER_ResponseAllocatorAllocFn_t  alloc_fn,
TRITONSERVER_ResponseAllocatorReleaseFn_t  release_fn,
TRITONSERVER_ResponseAllocatorStartFn_t  start_fn 
)

Create a new response allocator object.

The response allocator object is used by Triton to allocate buffers to hold the output tensors in inference responses. Most models generate a single response for each inference request (TRITONSERVER_TXN_ONE_TO_ONE). For these models the order of callbacks will be:

TRITONSERVER_ServerInferAsync called

  • start_fn : optional (and typically not required)
  • alloc_fn : called once for each output tensor in response TRITONSERVER_InferenceResponseDelete called
  • release_fn: called once for each output tensor in response

For models that generate multiple responses for each inference request (TRITONSERVER_TXN_DECOUPLED), the start_fn callback can be used to determine sets of alloc_fn callbacks that belong to the same response:

TRITONSERVER_ServerInferAsync called

  • start_fn
  • alloc_fn : called once for each output tensor in response
  • start_fn
  • alloc_fn : called once for each output tensor in response ... For each response, TRITONSERVER_InferenceResponseDelete called
  • release_fn: called once for each output tensor in the response

In all cases the start_fn, alloc_fn and release_fn callback functions must be thread-safe. Typically making these functions thread-safe does not require explicit locking. The recommended way to implement these functions is to have each inference request provide a 'response_allocator_userp' object that is unique to that request with TRITONSERVER_InferenceRequestSetResponseCallback. The callback functions then operate only on this unique state. Locking is required only when the callback function needs to access state that is shared across inference requests (for example, a common allocation pool).

Parameters
allocatorReturns the new response allocator object.
alloc_fnThe function to call to allocate buffers for result tensors.
release_fnThe function to call when the server no longer holds a reference to an allocated buffer.
start_fnThe function to call to indicate that the subsequent 'alloc_fn' calls are for a new response. This callback is optional (use nullptr to indicate that it should not be invoked).
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ResponseAllocatorSetBufferAttributesFunction()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ResponseAllocatorSetBufferAttributesFunction ( TRITONSERVER_ResponseAllocator *  allocator,
TRITONSERVER_ResponseAllocatorBufferAttributesFn_t  buffer_attributes_fn 
)

Set the buffer attributes function for a response allocator object.

The function will be called after alloc_fn to set the buffer attributes associated with the output buffer.

The thread-safy requirement for buffer_attributes_fn is the same as other allocator callbacks.

Parameters
allocatorThe response allocator object.
buffer_attributes_fnThe function to call to get the buffer attributes information for an allocated buffer.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ResponseAllocatorSetQueryFunction()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ResponseAllocatorSetQueryFunction ( TRITONSERVER_ResponseAllocator *  allocator,
TRITONSERVER_ResponseAllocatorQueryFn_t  query_fn 
)

Set the query function to a response allocator object.

Usually the function will be called before alloc_fn to understand what is the allocator's preferred memory type and memory type ID at the current situation to make different execution decision.

The thread-safy requirement for query_fn is the same as other allocator callbacks.

Parameters
allocatorThe response allocator object.
query_fnThe function to call to query allocator's preferred memory type and memory type ID.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerDelete()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerDelete ( TRITONSERVER_Server *  server)

Delete a server object.

If server is not already stopped it is stopped before being deleted.

Parameters
serverThe inference server object.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerInferAsync()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerInferAsync ( TRITONSERVER_Server *  server,
TRITONSERVER_InferenceRequest *  inference_request,
TRITONSERVER_InferenceTrace *  trace 
)

Perform inference using the meta-data and inputs supplied by the 'inference_request'.

If the function returns success, then the caller releases ownership of 'inference_request' and must not access it in any way after this call, until ownership is returned via the 'request_release_fn' callback registered in the request object with TRITONSERVER_InferenceRequestSetReleaseCallback.

The function unconditionally takes ownership of 'trace' and so the caller must not access it in any way after this call (except in the trace activity callbacks) until ownership is returned via the trace's release_fn callback.

Responses produced for this request are returned using the allocator and callback registered with the request by TRITONSERVER_InferenceRequestSetResponseCallback.

Parameters
serverThe inference server object.
inference_requestThe request object.
traceThe trace object for this request, or nullptr if no tracing.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerIsLive()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerIsLive ( TRITONSERVER_Server *  server,
bool *  live 
)

Is the server live?

Parameters
serverThe inference server object.
liveReturns true if server is live, false otherwise.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerIsReady()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerIsReady ( TRITONSERVER_Server *  server,
bool *  ready 
)

Is the server ready?

Parameters
serverThe inference server object.
readyReturns true if server is ready, false otherwise.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerLoadModel()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerLoadModel ( TRITONSERVER_Server *  server,
const char *  model_name 
)

Load the requested model or reload the model if it is already loaded.

The function does not return until the model is loaded or fails to load. Returned error indicates if model loaded successfully or not.

Parameters
serverThe inference server object.
model_nameThe name of the model.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerLoadModelWithParameters()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerLoadModelWithParameters ( TRITONSERVER_Server *  server,
const char *  model_name,
const TRITONSERVER_Parameter **  parameters,
const uint64_t  parameter_count 
)

Load the requested model or reload the model if it is already loaded, with load parameters provided.

The function does not return until the model is loaded or fails to load. Returned error indicates if model loaded successfully or not. Currently the below parameter names are recognized:

  • "config" : string parameter that contains a JSON representation of the model configuration. This config will be used for loading the model instead of the one in the model directory.
Parameters
serverThe inference server object.
model_nameThe name of the model.
parametersThe array of load parameters.
parameter_countThe number of parameters.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerMetadata()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerMetadata ( TRITONSERVER_Server *  server,
TRITONSERVER_Message **  server_metadata 
)

Get the metadata of the server as a TRITONSERVER_Message object.

The caller takes ownership of the message object and must call TRITONSERVER_MessageDelete to release the object.

Parameters
serverThe inference server object.
server_metadataReturns the server metadata message.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerMetrics()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerMetrics ( TRITONSERVER_Server *  server,
TRITONSERVER_Metrics **  metrics 
)

Get the current metrics for the server.

The caller takes ownership of the metrics object and must call TRITONSERVER_MetricsDelete to release the object.

Parameters
serverThe inference server object.
metricsReturns the metrics.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerModelBatchProperties()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerModelBatchProperties ( TRITONSERVER_Server *  server,
const char *  model_name,
const int64_t  model_version,
uint32_t *  flags,
void **  voidp 
)

Get the batch properties of the model.

The properties are communicated by a flags value and an (optional) object returned by 'voidp'.

  • TRITONSERVER_BATCH_UNKNOWN: Triton cannot determine the batching properties of the model. This means that the model does not support batching in any way that is useable by Triton. The returned 'voidp' value is nullptr.
  • TRITONSERVER_BATCH_FIRST_DIM: The model supports batching along the first dimension of every input and output tensor. Triton schedulers that perform batching can automatically batch inference requests along this dimension. The returned 'voidp' value is nullptr.
Parameters
serverThe inference server object.
model_nameThe name of the model.
model_versionThe version of the model. If -1 then the server will choose a version based on the model's policy.
flagsReturns flags indicating the batch properties of the model.
voidpIf non-nullptr, returns a point specific to the 'flags' value.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerModelConfig()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerModelConfig ( TRITONSERVER_Server *  server,
const char *  model_name,
const int64_t  model_version,
const uint32_t  config_version,
TRITONSERVER_Message **  model_config 
)

Get the configuration of a model as a TRITONSERVER_Message object.

The caller takes ownership of the message object and must call TRITONSERVER_MessageDelete to release the object.

Parameters
serverThe inference server object.
model_nameThe name of the model.
model_versionThe version of the model. If -1 then the server will choose a version based on the model's policy.
config_versionThe model configuration will be returned in a format matching this version. If the configuration cannot be represented in the requested version's format then an error will be returned. Currently only version 1 is supported.
model_configReturns the model config message.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerModelIndex()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerModelIndex ( TRITONSERVER_Server *  server,
uint32_t  flags,
TRITONSERVER_Message **  model_index 
)

Get the index of all unique models in the model repositories as a TRITONSERVER_Message object.

The caller takes ownership of the message object and must call TRITONSERVER_MessageDelete to release the object.

If TRITONSERVER_INDEX_FLAG_READY is set in 'flags' only the models that are loaded into the server and ready for inferencing are returned.

Parameters
serverThe inference server object.
flagsTRITONSERVER_ModelIndexFlag flags that control how to collect the index.
model_indexReturn the model index message that holds the index of all models contained in the server's model repository(s).
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerModelIsReady()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerModelIsReady ( TRITONSERVER_Server *  server,
const char *  model_name,
const int64_t  model_version,
bool *  ready 
)

Is the model ready?

Parameters
serverThe inference server object.
model_nameThe name of the model to get readiness for.
model_versionThe version of the model to get readiness for. If -1 then the server will choose a version based on the model's policy.
readyReturns true if server is ready, false otherwise.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerModelMetadata()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerModelMetadata ( TRITONSERVER_Server *  server,
const char *  model_name,
const int64_t  model_version,
TRITONSERVER_Message **  model_metadata 
)

Get the metadata of a model as a TRITONSERVER_Message object.

The caller takes ownership of the message object and must call TRITONSERVER_MessageDelete to release the object.

Parameters
serverThe inference server object.
model_nameThe name of the model.
model_versionThe version of the model. If -1 then the server will choose a version based on the model's policy.
model_metadataReturns the model metadata message.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerModelStatistics()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerModelStatistics ( TRITONSERVER_Server *  server,
const char *  model_name,
const int64_t  model_version,
TRITONSERVER_Message **  model_stats 
)

Get the statistics of a model as a TRITONSERVER_Message object.

The caller takes ownership of the object and must call TRITONSERVER_MessageDelete to release the object.

Parameters
serverThe inference server object.
model_nameThe name of the model. If empty, then statistics for all available models will be returned, and the server will choose a version based on those models' policies.
model_versionThe version of the model. If -1 then the server will choose a version based on the model's policy.
model_statsReturns the model statistics message.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerModelTransactionProperties()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerModelTransactionProperties ( TRITONSERVER_Server *  server,
const char *  model_name,
const int64_t  model_version,
uint32_t *  txn_flags,
void **  voidp 
)

Get the transaction policy of the model.

The policy is communicated by a flags value.

  • TRITONSERVER_TXN_ONE_TO_ONE: The model generates exactly one response per request.
  • TRITONSERVER_TXN_DECOUPLED: The model may generate zero to many responses per request.
Parameters
serverThe inference server object.
model_nameThe name of the model.
model_versionThe version of the model. If -1 then the server will choose a version based on the model's policy.
txn_flagsReturns flags indicating the transaction policy of the model.
voidpIf non-nullptr, returns a point specific to the 'flags' value.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerNew()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerNew ( TRITONSERVER_Server **  server,
TRITONSERVER_ServerOptions *  options 
)

Create a new server object.

The caller takes ownership of the TRITONSERVER_Server object and must call TRITONSERVER_ServerDelete to release the object.

Parameters
serverReturns the new inference server object.
optionsThe inference server options object.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsAddRateLimiterResource()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsAddRateLimiterResource ( TRITONSERVER_ServerOptions *  options,
const char *  resource_name,
const size_t  resource_count,
const int  device 
)

Add resource count for rate limiting.

Parameters
optionsThe server options object.
nameThe name of the resource.
countThe count of the resource.
deviceThe device identifier for the resource. A value of -1 indicates that the specified number of resources are available on every device. The device value is ignored for a global resource. The server will use the rate limiter configuration specified for instance groups in model config to determine whether resource is global. In case of conflicting resource type in different model configurations, server will raise an appropriate error while loading model.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsDelete()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsDelete ( TRITONSERVER_ServerOptions *  options)

Delete a server options object.

Parameters
optionsThe server options object.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsNew()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsNew ( TRITONSERVER_ServerOptions **  options)

Create a new server options object.

The caller takes ownership of the TRITONSERVER_ServerOptions object and must call TRITONSERVER_ServerOptionsDelete to release the object.

Parameters
optionsReturns the new server options object.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetBackendConfig()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetBackendConfig ( TRITONSERVER_ServerOptions *  options,
const char *  backend_name,
const char *  setting,
const char *  value 
)

Set a configuration setting for a named backend in a server options.

Parameters
optionsThe server options object.
backend_nameThe name of the backend.
settingThe name of the setting.
valueThe setting value.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetBackendDirectory()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetBackendDirectory ( TRITONSERVER_ServerOptions *  options,
const char *  backend_dir 
)

Set the directory containing backend shared libraries.

This directory is searched last after the version and model directory in the model repository when looking for the backend shared library for a model. If the backend is named 'be' the directory searched is 'backend_dir'/be/libtriton_be.so.

Parameters
optionsThe server options object.
backend_dirThe full path of the backend directory.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetBufferManagerThreadCount()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetBufferManagerThreadCount ( TRITONSERVER_ServerOptions *  options,
unsigned int  thread_count 
)

Set the number of threads used in buffer manager in a server options.

Parameters
optionsThe server options object.
thread_countThe number of threads.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetCpuMetrics()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetCpuMetrics ( TRITONSERVER_ServerOptions *  options,
bool  cpu_metrics 
)

Enable or disable CPU metrics collection in a server options.

CPU metrics are collected if both this option and TRITONSERVER_ServerOptionsSetMetrics are true.

Parameters
optionsThe server options object.
cpu_metricsTrue to enable CPU metrics, false to disable.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetCudaMemoryPoolByteSize()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetCudaMemoryPoolByteSize ( TRITONSERVER_ServerOptions *  options,
int  gpu_device,
uint64_t  size 
)

Set the total CUDA memory byte size that the server can allocate on given GPU device in a server options.

The pinned memory pool will be shared across Triton itself and the backends that use TRITONBACKEND_MemoryManager to allocate memory.

Parameters
optionsThe server options object.
gpu_deviceThe GPU device to allocate the memory pool.
sizeThe CUDA memory pool byte size.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetExitOnError()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetExitOnError ( TRITONSERVER_ServerOptions *  options,
bool  exit 
)

Enable or disable exit-on-error in a server options.

Parameters
optionsThe server options object.
exitTrue to enable exiting on intialization error, false to continue.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetExitTimeout()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetExitTimeout ( TRITONSERVER_ServerOptions *  options,
unsigned int  timeout 
)

Set the exit timeout, in seconds, for the server in a server options.

Parameters
optionsThe server options object.
timeoutThe exit timeout, in seconds.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetGpuMetrics()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetGpuMetrics ( TRITONSERVER_ServerOptions *  options,
bool  gpu_metrics 
)

Enable or disable GPU metrics collection in a server options.

GPU metrics are collected if both this option and TRITONSERVER_ServerOptionsSetMetrics are true.

Parameters
optionsThe server options object.
gpu_metricsTrue to enable GPU metrics, false to disable.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetHostPolicy()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetHostPolicy ( TRITONSERVER_ServerOptions *  options,
const char *  policy_name,
const char *  setting,
const char *  value 
)

Set a host policy setting for a given policy name in a server options.

Parameters
optionsThe server options object.
policy_nameThe name of the policy.
settingThe name of the setting.
valueThe setting value.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetLogError()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetLogError ( TRITONSERVER_ServerOptions *  options,
bool  log 
)

Enable or disable error level logging.

Parameters
optionsThe server options object.
logTrue to enable error logging, false to disable.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetLogFile()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetLogFile ( TRITONSERVER_ServerOptions *  options,
const char *  file 
)

Provide a log output file.

Parameters
optionsThe server options object.
filea string defining the file where the log outputs will be saved. An empty string for the file name will cause triton to direct logging facilities to the console
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetLogFormat()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetLogFormat ( TRITONSERVER_ServerOptions *  options,
const TRITONSERVER_LogFormat  format 
)

Set the logging format.

Parameters
optionsThe server options object.
formatThe logging format.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetLogInfo()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetLogInfo ( TRITONSERVER_ServerOptions *  options,
bool  log 
)

Enable or disable info level logging.

Parameters
optionsThe server options object.
logTrue to enable info logging, false to disable.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetLogVerbose()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetLogVerbose ( TRITONSERVER_ServerOptions *  options,
int  level 
)

Set verbose logging level.

Level zero disables verbose logging.

Parameters
optionsThe server options object.
levelThe verbose logging level.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetLogWarn()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetLogWarn ( TRITONSERVER_ServerOptions *  options,
bool  log 
)

Enable or disable warning level logging.

Parameters
optionsThe server options object.
logTrue to enable warning logging, false to disable.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetMetrics()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetMetrics ( TRITONSERVER_ServerOptions *  options,
bool  metrics 
)

Enable or disable metrics collection in a server options.

Parameters
optionsThe server options object.
metricsTrue to enable metrics, false to disable.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetMetricsInterval()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetMetricsInterval ( TRITONSERVER_ServerOptions *  options,
uint64_t  metrics_interval_ms 
)

Set the interval for metrics collection in a server options.

This is 2000 milliseconds by default.

Parameters
optionsThe server options object.
metrics_interval_msThe time interval in ms between successive metrics updates.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability ( TRITONSERVER_ServerOptions *  options,
double  cc 
)

Set the minimum support CUDA compute capability in a server options.

Parameters
optionsThe server options object.
ccThe minimum CUDA compute capability.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetModelControlMode()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetModelControlMode ( TRITONSERVER_ServerOptions *  options,
TRITONSERVER_ModelControlMode  mode 
)

Set the model control mode in a server options.

For each mode the models will be managed as the following:

TRITONSERVER_MODEL_CONTROL_NONE: the models in model repository will be loaded on startup. After startup any changes to the model repository will be ignored. Calling TRITONSERVER_ServerPollModelRepository will result in an error.

TRITONSERVER_MODEL_CONTROL_POLL: the models in model repository will be loaded on startup. The model repository can be polled periodically using TRITONSERVER_ServerPollModelRepository and the server will load, unload, and updated models according to changes in the model repository.

TRITONSERVER_MODEL_CONTROL_EXPLICIT: the models in model repository will not be loaded on startup. The corresponding model control APIs must be called to load / unload a model in the model repository.

Parameters
optionsThe server options object.
modeThe mode to use for the model control.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetModelLoadDeviceLimit()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetModelLoadDeviceLimit ( TRITONSERVER_ServerOptions *  options,
const TRITONSERVER_InstanceGroupKind  kind,
const int  device_id,
const double  fraction 
)

Specify the limit on memory usage as a fraction on the device identified by 'kind' and 'device_id'.

If model loading on the device is requested and the current memory usage exceeds the limit, the load will be rejected. If not specified, the limit will not be set.

Currently support TRITONSERVER_INSTANCEGROUPKIND_GPU

Parameters
optionsThe server options object.
kindThe kind of the device.
device_idThe id of the device.
fractionThe limit on memory usage as a fraction
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetModelLoadThreadCount()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetModelLoadThreadCount ( TRITONSERVER_ServerOptions *  options,
unsigned int  thread_count 
)

Set the number of threads to concurrently load models in a server options.

Parameters
optionsThe server options object.
thread_countThe number of threads.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetModelRepositoryPath()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetModelRepositoryPath ( TRITONSERVER_ServerOptions *  options,
const char *  model_repository_path 
)

Set the model repository path in a server options.

The path must be the full absolute path to the model repository. This function can be called multiple times with different paths to set multiple model repositories. Note that if a model is not unique across all model repositories at any time, the model will not be available.

Parameters
optionsThe server options object.
model_repository_pathThe full path to the model repository.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetPinnedMemoryPoolByteSize()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetPinnedMemoryPoolByteSize ( TRITONSERVER_ServerOptions *  options,
uint64_t  size 
)

Set the total pinned memory byte size that the server can allocate in a server options.

The pinned memory pool will be shared across Triton itself and the backends that use TRITONBACKEND_MemoryManager to allocate memory.

Parameters
optionsThe server options object.
sizeThe pinned memory pool byte size.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetRateLimiterMode()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetRateLimiterMode ( TRITONSERVER_ServerOptions *  options,
TRITONSERVER_RateLimitMode  mode 
)

Set the rate limit mode in a server options.

TRITONSERVER_RATE_LIMIT_EXEC_COUNT: The rate limiting prioritizes the inference execution using the number of times each instance has got a chance to run. The execution gets to run only when its resource constraints are satisfied.

TRITONSERVER_RATE_LIMIT_OFF: The rate limiting is turned off and the inference gets executed whenever an instance is available.

Parameters
optionsThe server options object.
modeThe mode to use for the rate limiting. By default, execution count is used to determine the priorities.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetRepoAgentDirectory()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetRepoAgentDirectory ( TRITONSERVER_ServerOptions *  options,
const char *  repoagent_dir 
)

Set the directory containing repository agent shared libraries.

This directory is searched when looking for the repository agent shared library for a model. If the backend is named 'ra' the directory searched is 'repoagent_dir'/ra/libtritonrepoagent_ra.so.

Parameters
optionsThe server options object.
repoagent_dirThe full path of the repository agent directory.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetResponseCacheByteSize()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetResponseCacheByteSize ( TRITONSERVER_ServerOptions *  options,
uint64_t  size 
)

Set the total response cache byte size that the server can allocate in CPU memory.

The response cache will be shared across all inference requests and across all models.

Parameters
optionsThe server options object.
sizeThe total response cache byte size.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetServerId()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetServerId ( TRITONSERVER_ServerOptions *  options,
const char *  server_id 
)

Set the textual ID for the server in a server options.

The ID is a name that identifies the server.

Parameters
optionsThe server options object.
server_idThe server identifier.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetStartupModel()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetStartupModel ( TRITONSERVER_ServerOptions *  options,
const char *  model_name 
)

Set the model to be loaded at startup in a server options.

The model must be present in one, and only one, of the specified model repositories. This function can be called multiple times with different model name to set multiple startup models. Note that it only takes affect on TRITONSERVER_MODEL_CONTROL_EXPLICIT mode.

Parameters
optionsThe server options object.
mode_nameThe name of the model to load on startup.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetStrictModelConfig()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetStrictModelConfig ( TRITONSERVER_ServerOptions *  options,
bool  strict 
)

Enable or disable strict model configuration handling in a server options.

Parameters
optionsThe server options object.
strictTrue to enable strict model configuration handling, false to disable.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerOptionsSetStrictReadiness()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerOptionsSetStrictReadiness ( TRITONSERVER_ServerOptions *  options,
bool  strict 
)

Enable or disable strict readiness handling in a server options.

Parameters
optionsThe server options object.
strictTrue to enable strict readiness handling, false to disable.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerPollModelRepository()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerPollModelRepository ( TRITONSERVER_Server *  server)

Check the model repository for changes and update server state based on those changes.

Parameters
serverThe inference server object.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerRegisterModelRepository()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerRegisterModelRepository ( TRITONSERVER_Server *  server,
const char *  repository_path,
const TRITONSERVER_Parameter **  name_mapping,
const uint32_t  mapping_count 
)

Register a new model repository.

Not available in polling mode.

Parameters
serverThe inference server object.
repository_pathThe full path to the model repository.
name_mappingList of name_mapping parameters. Each mapping has the model directory name as its key, overriden model name as its value.
model_countNumber of mappings provided.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerStop()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerStop ( TRITONSERVER_Server *  server)

Stop a server object.

A server can't be restarted once it is stopped.

Parameters
serverThe inference server object.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerUnloadModel()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerUnloadModel ( TRITONSERVER_Server *  server,
const char *  model_name 
)

Unload the requested model.

Unloading a model that is not loaded on server has no affect and success code will be returned. The function does not wait for the requested model to be fully unload and success code will be returned. Returned error indicates if model unloaded successfully or not.

Parameters
serverThe inference server object.
model_nameThe name of the model.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerUnloadModelAndDependents()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerUnloadModelAndDependents ( TRITONSERVER_Server *  server,
const char *  model_name 
)

Unload the requested model, and also unload any dependent model that was loaded along with the requested model (for example, the models composing an ensemble).

Unloading a model that is not loaded on server has no affect and success code will be returned. The function does not wait for the requested model and all dependent models to be fully unload and success code will be returned. Returned error indicates if model unloaded successfully or not.

Parameters
serverThe inference server object.
model_nameThe name of the model.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_ServerUnregisterModelRepository()

TRITONSERVER_DECLSPEC TRITONSERVER_Error* TRITONSERVER_ServerUnregisterModelRepository ( TRITONSERVER_Server *  server,
const char *  repository_path 
)

Unregister a model repository.

Not available in polling mode.

Parameters
serverThe inference server object.
repository_pathThe full path to the model repository.
Returns
a TRITONSERVER_Error indicating success or failure.

◆ TRITONSERVER_StringToDataType()

TRITONSERVER_DECLSPEC TRITONSERVER_DataType TRITONSERVER_StringToDataType ( const char *  dtype)

Get the Triton datatype corresponding to a string representation of a datatype.

Parameters
dtypeThe datatype string representation.
Returns
The Triton data type or TRITONSERVER_TYPE_INVALID if the string does not represent a data type.